// Anime4K_Restore_CNN_Soft_VL
// Ported from https://github.com/bloc97/Anime4K/blob/4ba94b179a144200cb6b3052e690fe2ca5c6914c/glsl/Restore/Anime4K_Restore_CNN_Soft_VL.glsl

//!MAGPIE EFFECT
//!VERSION 4
//!SORT_NAME Anime4K_Restore_Soft_3
//!USE MulAdd
//!CAPABILITY FP16

#include "../StubDefs.hlsli"


//!TEXTURE
Texture2D INPUT;

//!TEXTURE
//!WIDTH INPUT_WIDTH
//!HEIGHT INPUT_HEIGHT
Texture2D OUTPUT;

//!TEXTURE
//!WIDTH INPUT_WIDTH
//!HEIGHT INPUT_HEIGHT
//!FORMAT R16G16B16A16_FLOAT
Texture2D tex1;

//!TEXTURE
//!WIDTH INPUT_WIDTH
//!HEIGHT INPUT_HEIGHT
//!FORMAT R16G16B16A16_FLOAT
Texture2D tex2;

//!TEXTURE
//!WIDTH INPUT_WIDTH
//!HEIGHT INPUT_HEIGHT
//!FORMAT R16G16B16A16_FLOAT
Texture2D tex3;

//!TEXTURE
//!WIDTH INPUT_WIDTH
//!HEIGHT INPUT_HEIGHT
//!FORMAT R16G16B16A16_FLOAT
Texture2D tex4;

//!TEXTURE
//!WIDTH INPUT_WIDTH
//!HEIGHT INPUT_HEIGHT
//!FORMAT R16G16B16A16_FLOAT
Texture2D tex5;

//!TEXTURE
//!WIDTH INPUT_WIDTH
//!HEIGHT INPUT_HEIGHT
//!FORMAT R16G16B16A16_FLOAT
Texture2D tex6;

//!SAMPLER
//!FILTER POINT
SamplerState sam;


//!PASS 1
//!DESC Conv-4x3x3x3
//!IN INPUT
//!OUT tex1, tex2
//!BLOCK_SIZE 16
//!NUM_THREADS 64

void Pass1(uint2 blockStart, uint3 threadId) {
	uint2 gxy = (Rmp8x8(threadId.x) << 1) + blockStart;
	uint2 inputSize = GetInputSize();
	if (gxy.x >= inputSize.x || gxy.y >= inputSize.y) {
		return;
	}

	float2 inputPt = GetInputPt();
	
    uint i, j;

	MF3 src[4][4];
	[unroll]
	for (i = 0; i <= 2; i += 2) {
		[unroll]
		for (j = 0; j <= 2; j += 2) {
			float2 tpos = (gxy + uint2(i, j)) * inputPt;
			const MF4 sr = INPUT.GatherRed(sam, tpos);
			const MF4 sg = INPUT.GatherGreen(sam, tpos);
			const MF4 sb = INPUT.GatherBlue(sam, tpos);

			// w z
			// x y
			src[i][j] = MF3(sr.w, sg.w, sb.w);
			src[i][j + 1] = MF3(sr.x, sg.x, sb.x);
			src[i + 1][j] = MF3(sr.z, sg.z, sb.z);
			src[i + 1][j + 1] = MF3(sr.y, sg.y, sb.y);
		}
	}

	[unroll]
	for (i = 1; i <= 2; ++i) {
		[unroll]
		for (j = 1; j <= 2; ++j) {
			uint2 destPos = gxy + uint2(i - 1, j - 1);

			if (i != 1 || j != 1) {
				if (destPos.x >= inputSize.x || destPos.y >= inputSize.y) {
					continue;
				}
			}

			MF4 target1 = { -0.040999945, 0.075765304, -0.0911532, -0.10705836 };
			target1 = MulAdd(src[i - 1][j - 1], MF3x4(0.14361712, -0.16690509, 0.37253398, -0.45202538, -0.21331833, -0.32675815, -0.33971128, 0.20261937, -0.20606318, -0.215143, -0.079716705, 0.15640882), target1);
			target1 = MulAdd(src[i - 1][j], MF3x4(-0.17360486, -0.3435545, 0.08199117, 0.56259036, -0.120246716, 0.24312893, -0.021436244, -0.11864853, 0.19452724, 0.106943935, -0.077393375, -0.3503661), target1);
			target1 = MulAdd(src[i - 1][j + 1], MF3x4(-0.072465785, 0.2772823, 0.25493625, 0.3098145, -0.115831695, 0.072458096, -0.014782132, -0.15310249, 0.12178311, -0.015555423, -0.2229811, 0.16469522), target1);
			target1 = MulAdd(src[i][j - 1], MF3x4(-0.18652022, -0.30702665, -0.59921896, 0.079824045, 0.4426619, 0.049343713, 0.44902903, -0.2711445, 0.20470268, -0.029203767, 0.29092675, 0.15562426), target1);
			target1 = MulAdd(src[i][j], MF3x4(-0.21041247, 0.48450592, -0.110547826, 0.3842122, 0.5303875, -0.26512837, 0.19846216, 0.045673862, 0.12773214, -0.05117536, -0.03510946, -0.30123934), target1);
			target1 = MulAdd(src[i][j + 1], MF3x4(0.3186735, 0.052702922, -0.12499774, 0.055628903, -0.16476867, 0.12642322, -0.18314636, 0.018323101, -0.3609603, 0.25649396, 0.3185421, -0.0057759956), target1);
			target1 = MulAdd(src[i + 1][j - 1], MF3x4(0.16603558, -0.09259665, -0.28760567, -0.14319661, 0.12511417, -0.12551902, -0.00070228375, 0.20914114, -0.22466865, 0.1064727, 0.32598525, -0.08596318), target1);
			target1 = MulAdd(src[i + 1][j], MF3x4(-0.03163653, 0.026722813, -0.4361858, -0.21164834, 0.4176763, 0.08203146, 0.35289326, -0.06128859, 0.20506798, -0.07098943, 0.1807802, 0.2658414), target1);
			target1 = MulAdd(src[i + 1][j + 1], MF3x4(-0.09821681, 0.058886815, 0.39192092, -0.06791861, -0.15682612, 0.09503328, -0.23400265, 0.026475023, -0.08800713, -0.043749645, -0.18024494, -0.08045564), target1);

			MF4 target2 = { -0.029148052, -0.03215124, -0.6175828, 0.057135154 };
			target2 = MulAdd(src[i - 1][j - 1], MF3x4(-0.16406488, -0.2506693, -0.15592022, -0.05529256, -0.3997277, -0.229681, -0.07762124, 0.1843808, 0.07895815, 0.14437248, 0.219114, -0.048090722), target2);
			target2 = MulAdd(src[i - 1][j], MF3x4(-0.2150676, 0.09080163, 0.19598733, -0.40578827, -0.33846557, -0.02518622, 0.037079208, 0.20188439, -0.013777575, -0.2369007, -0.30985412, 0.0411912), target2);
			target2 = MulAdd(src[i - 1][j + 1], MF3x4(0.119948365, 0.23014452, -0.14962277, -0.096262485, 0.09625151, 0.2025487, 0.03933539, 0.12268028, -0.24373281, 0.19730613, 0.11634144, 0.12293635), target2);
			target2 = MulAdd(src[i][j - 1], MF3x4(0.08030697, -0.40114692, 0.21532272, 0.20222071, 0.073098, -0.004463858, 0.02820587, -0.18861918, -0.20994501, -0.12444653, -0.23178193, -0.13965288), target2);
			target2 = MulAdd(src[i][j], MF3x4(0.14150894, 0.14563078, 0.697704, 0.20918849, 0.26776335, -0.34291518, 0.06394055, 0.17925078, 0.4165139, -0.042595536, 0.105312675, 0.231854), target2);
			target2 = MulAdd(src[i][j + 1], MF3x4(0.024318576, 0.16668217, 0.0729521, -0.7071404, 0.3121693, 0.37295797, -0.015632952, 0.33763757, 0.00706697, 0.10836652, -0.11132417, 0.292844), target2);
			target2 = MulAdd(src[i + 1][j - 1], MF3x4(-0.14489831, 0.0027769986, -0.24509215, 0.5557927, -0.1104541, 0.005070684, -0.020032275, -0.5642205, 0.16048644, 0.07248175, 0.20387374, -0.38145426), target2);
			target2 = MulAdd(src[i + 1][j], MF3x4(0.33140838, -0.007438425, 0.26074782, 0.15947102, 0.219755, -0.14690271, -0.07412696, -0.24176367, -0.2230114, 0.027256912, -0.11255796, -0.05882673), target2);
			target2 = MulAdd(src[i + 1][j + 1], MF3x4(-0.19712369, 0.003842208, -0.10893768, 0.09047115, -0.10260409, 0.18662766, 0.009733428, 0.0039940844, -0.006444674, -0.15196493, 0.06641555, -0.06169452), target2);

			tex1[destPos] = target1;
			tex2[destPos] = target2;
		}
	}
}

//!PASS 2
//!DESC Conv-4x3x3x16
//!IN tex1, tex2
//!OUT tex3, tex4
//!BLOCK_SIZE 8
//!NUM_THREADS 64

void Pass2(uint2 blockStart, uint3 threadId) {
	uint2 gxy = Rmp8x8(threadId.x) + blockStart;
	uint2 inputSize = GetInputSize();
	if (gxy.x >= inputSize.x || gxy.y >= inputSize.y) {
		return;
	}

	float2 inputPt = GetInputPt();
	float2 pos = (gxy + 0.5f) * inputPt;

	// [ a, d, g ]
	// [ b, e, h ]
	// [ c, f, i ]
	MF4 a1 = tex1.SampleLevel(sam, pos + float2(-inputPt.x, -inputPt.y), 0);
	MF4 b1 = tex1.SampleLevel(sam, pos + float2(-inputPt.x, 0), 0);
	MF4 c1 = tex1.SampleLevel(sam, pos + float2(-inputPt.x, inputPt.y), 0);
	MF4 d1 = tex1.SampleLevel(sam, pos + float2(0, -inputPt.y), 0);
	MF4 e1 = tex1.SampleLevel(sam, pos, 0);
	MF4 f1 = tex1.SampleLevel(sam, pos + float2(0, inputPt.y), 0);
	MF4 g1 = tex1.SampleLevel(sam, pos + float2(inputPt.x, -inputPt.y), 0);
	MF4 h1 = tex1.SampleLevel(sam, pos + float2(inputPt.x, 0), 0);
	MF4 i1 = tex1.SampleLevel(sam, pos + float2(inputPt.x, inputPt.y), 0);

	MF4 na1 = max(-a1, 0);
	MF4 nb1 = max(-b1, 0);
	MF4 nc1 = max(-c1, 0);
	MF4 nd1 = max(-d1, 0);
	MF4 ne1 = max(-e1, 0);
	MF4 nf1 = max(-f1, 0);
	MF4 ng1 = max(-g1, 0);
	MF4 nh1 = max(-h1, 0);
	MF4 ni1 = max(-i1, 0);

	a1 = max(a1, 0);
	b1 = max(b1, 0);
	c1 = max(c1, 0);
	d1 = max(d1, 0);
	e1 = max(e1, 0);
	f1 = max(f1, 0);
	g1 = max(g1, 0);
	h1 = max(h1, 0);
	i1 = max(i1, 0);

	MF4 a2 = tex2.SampleLevel(sam, pos + float2(-inputPt.x, -inputPt.y), 0);
	MF4 b2 = tex2.SampleLevel(sam, pos + float2(-inputPt.x, 0), 0);
	MF4 c2 = tex2.SampleLevel(sam, pos + float2(-inputPt.x, inputPt.y), 0);
	MF4 d2 = tex2.SampleLevel(sam, pos + float2(0, -inputPt.y), 0);
	MF4 e2 = tex2.SampleLevel(sam, pos, 0);
	MF4 f2 = tex2.SampleLevel(sam, pos + float2(0, inputPt.y), 0);
	MF4 g2 = tex2.SampleLevel(sam, pos + float2(inputPt.x, -inputPt.y), 0);
	MF4 h2 = tex2.SampleLevel(sam, pos + float2(inputPt.x, 0), 0);
	MF4 i2 = tex2.SampleLevel(sam, pos + float2(inputPt.x, inputPt.y), 0);

	MF4 na2 = max(-a2, 0);
	MF4 nb2 = max(-b2, 0);
	MF4 nc2 = max(-c2, 0);
	MF4 nd2 = max(-d2, 0);
	MF4 ne2 = max(-e2, 0);
	MF4 nf2 = max(-f2, 0);
	MF4 ng2 = max(-g2, 0);
	MF4 nh2 = max(-h2, 0);
	MF4 ni2 = max(-i2, 0);

	a2 = max(a2, 0);
	b2 = max(b2, 0);
	c2 = max(c2, 0);
	d2 = max(d2, 0);
	e2 = max(e2, 0);
	f2 = max(f2, 0);
	g2 = max(g2, 0);
	h2 = max(h2, 0);
	i2 = max(i2, 0);

	MF4 target1 = { 0.012687187, -0.11876551, -0.041985378, -0.10110911 };
	target1 = MulAdd(a1, MF4x4(0.05195995, 0.15220909, -0.24354807, -0.109075695, 0.020483498, -0.14830725, 0.0018816335, -0.0072673927, 0.0649385, 0.046050787, -0.10789607, -0.046609525, -0.11455093, -0.009358115, 0.11280759, 0.18053898), target1);
	target1 = MulAdd(b1, MF4x4(-0.08619698, 0.091353096, -0.16379662, 0.07822936, 0.072919995, 0.1482446, 0.17846228, 0.04639898, -0.18998149, 0.1653338, -0.44187957, -0.010017503, -0.069953404, 0.08784785, -0.16391355, 0.35095468), target1);
	target1 = MulAdd(c1, MF4x4(0.088297926, 0.27259287, 0.013088447, 0.023461785, 0.10037149, -0.017414214, 0.08559885, -0.10822335, 0.10591637, 0.17240539, 0.15749931, 0.026641782, 0.11889612, -0.018095117, -0.08736018, 0.09934933), target1);
	target1 = MulAdd(d1, MF4x4(0.21426749, 0.0800268, -0.19816414, 0.07693414, 0.026270509, -0.11724047, 0.026078718, 0.13080709, 0.12207936, 0.056103867, -0.323923, -0.111454345, 0.059245165, 0.07257926, 0.032195322, 0.27225617), target1);
	target1 = MulAdd(e1, MF4x4(-0.20130268, 0.026809234, -0.0020803472, -0.04394057, -0.1982125, -0.033678252, -0.12881789, 0.0025656687, 0.14193355, -0.2541802, -0.13239717, -0.05983356, -0.029376393, 0.33187667, 0.14438996, -0.21993925), target1);
	target1 = MulAdd(f1, MF4x4(-0.12772562, 0.022498213, 0.24753313, 0.07440761, -0.17564529, 0.09971503, -0.013372, 0.09459552, -0.21597451, -0.40116546, 0.23446435, 0.1515452, 0.050813515, 0.19662157, -0.10604596, -0.24638489), target1);
	target1 = MulAdd(g1, MF4x4(0.23866327, -0.2706382, -0.07480157, 0.03789501, 0.117716484, -0.095995456, -0.0435066, 0.013025061, -0.029759895, -0.036287807, 0.08570493, 0.030151363, 0.18863682, -0.27228612, 0.020479294, -0.07058746), target1);
	target1 = MulAdd(h1, MF4x4(0.0026758043, -0.20750894, 0.2802277, -0.07761428, -0.012089615, -0.112726666, -0.03867965, -0.085082226, 0.034227375, 0.19662802, 0.26272395, 0.036822405, -0.23040786, -0.20173554, 0.07110236, -0.26939383), target1);
	target1 = MulAdd(i1, MF4x4(-0.14012688, -0.067249745, 0.14726773, -0.0070919944, 0.19275497, 0.04460783, 0.18776374, -0.019941995, -0.076159865, 0.002261728, 0.238768, 0.039375026, 0.13200568, -0.023286859, 0.034387972, -0.01827453), target1);
	target1 = MulAdd(a2, MF4x4(-0.0107542025, -0.13001555, 0.06596806, -0.03370635, -0.024291076, 0.10367739, -0.03396605, 0.041960735, 0.16230568, 0.024845246, -0.016806586, 0.22547007, -0.025378102, 0.064547986, -0.2113137, 0.042272836), target1);
	target1 = MulAdd(b2, MF4x4(-0.2219356, -0.049535394, 0.10289468, 0.14175911, 0.013058568, -0.15909089, -0.02546921, 0.11721571, 0.13020545, 0.39660174, -0.07601573, -0.16366366, -0.023935124, 0.06681424, -0.26143414, -0.07485668), target1);
	target1 = MulAdd(c2, MF4x4(0.1405031, -0.0645044, -0.15865614, 0.1829069, -0.22526503, 0.08991175, 0.041972812, -0.012462953, 0.3022753, 0.19457603, 0.022607598, -0.25460255, 0.028327515, 0.14420614, -0.077984214, 0.09278112), target1);
	target1 = MulAdd(d2, MF4x4(0.13224132, 0.13115089, -0.188987, -0.19428022, -0.080641985, 0.20909777, 0.067079, -0.19832124, 0.13150498, 0.04450851, -0.2770351, -0.010381239, 0.32295567, 0.04445836, 0.030657565, 0.020271502), target1);
	target1 = MulAdd(e2, MF4x4(-0.08188993, 0.039709873, 0.16059989, -0.13279189, 0.11389818, -0.071865685, 0.09312801, -0.08816363, -0.65844774, -0.6854379, 0.21431407, 0.597198, -0.3734657, -0.116027676, 0.015932929, -0.0653176), target1);
	target1 = MulAdd(f2, MF4x4(0.24136105, 0.21444799, -0.14235207, 0.08445492, 0.017335927, -0.49877876, -0.06224622, 0.1571534, 0.035594277, 0.059829034, 0.087631516, -0.17090686, -0.005452869, 0.13786094, 0.27586326, 0.046760406), target1);
	target1 = MulAdd(g2, MF4x4(0.095078, 0.30310658, 0.010268592, 0.18540539, -0.20722823, -0.0005848572, -0.06464327, -0.111019135, -0.07837157, -0.12183798, -0.09187498, -0.3368629, -0.08216629, -0.20095807, 0.009563313, 0.024838416), target1);
	target1 = MulAdd(h2, MF4x4(0.28712475, 0.0641969, -0.034764312, 0.13600683, -0.09211094, 0.009699817, -0.001104855, -0.026146285, 0.33425868, -0.16132632, 0.18051304, -0.104004376, 0.20768233, 0.0888418, 0.050057285, -0.020228952), target1);
	target1 = MulAdd(i2, MF4x4(0.11642946, -0.021900529, -0.08910504, 0.15492517, -0.19726521, 0.1434987, -0.24708387, 0.006737377, 0.11353539, -0.15897587, -0.029491093, 0.06002862, -0.09640613, -0.11342702, 0.21375169, 0.0062719737), target1);
	target1 = MulAdd(na1, MF4x4(-0.15513068, -0.3151456, 0.20799752, -0.07449935, -0.09226967, 0.112302735, -0.16211908, -0.37986508, -0.27418482, -0.10445544, 0.21112369, -0.06780466, -0.062341, 0.07758948, -0.012719117, -0.16481343), target1);
	target1 = MulAdd(nb1, MF4x4(0.16382848, 0.14490448, -0.012869055, 0.1804095, -0.05304844, -0.14624795, -0.14816979, -0.17435774, 0.25356865, 0.11435022, 0.19412366, 0.19499794, -0.10189348, 0.023880519, 0.16822465, -0.17454338), target1);
	target1 = MulAdd(nc1, MF4x4(0.04854064, 0.11944563, 0.022984248, -0.0852543, -0.0077684796, -0.044182744, -0.02888099, 0.27452356, -0.07887827, -0.15155658, -0.12841311, -0.21202831, -0.18533322, -0.05852455, 0.0761054, -0.22115342), target1);
	target1 = MulAdd(nd1, MF4x4(-0.21520375, 0.11415518, 0.18909843, -0.16420493, -0.20909967, -0.3257246, 0.29332343, -0.029541709, -0.1679851, 0.14073059, 0.32720464, 0.13311239, -0.0021121972, -0.08773544, -0.045532625, 0.36960867), target1);
	target1 = MulAdd(ne1, MF4x4(0.58407414, -0.23632582, -0.16739567, 0.264173, 0.09584864, 0.18455075, 0.20051196, -0.04616608, 0.13441175, -0.0055764276, -0.08625195, 0.097847305, 0.19565724, -0.12183587, -0.11488796, 0.2520169), target1);
	target1 = MulAdd(nf1, MF4x4(0.01584208, -0.31471413, 0.017104283, 0.0682452, 0.18728764, 0.042960413, 0.06437809, -0.14483811, 0.13882554, 0.016576322, -0.029599546, 0.034904055, -0.20939542, -0.10213055, 0.08821727, 0.0030552552), target1);
	target1 = MulAdd(ng1, MF4x4(-0.2973797, 0.15791039, 0.10811437, -0.07947077, -0.26328024, -0.061920475, 0.12498813, 0.100570425, -0.018922925, 0.002256239, -0.094379805, -0.032315314, 0.48677605, -0.04879864, 0.028028104, -0.14557233), target1);
	target1 = MulAdd(nh1, MF4x4(0.016148027, 0.13884154, -0.19554809, -0.006344376, -0.013450252, 0.2581758, 0.10643088, 0.23465036, -0.078438915, -0.099644944, -0.1442203, -0.2285087, 0.33528957, -0.17052084, -0.26595074, 0.14794162), target1);
	target1 = MulAdd(ni1, MF4x4(0.041404825, -0.0813985, -0.19863169, -0.008302881, 0.023570588, -0.043578386, -0.20971186, 0.14654282, 0.048436746, 0.11266723, -0.25812748, -0.03340969, -0.18430679, -0.046258014, -0.007674466, -0.037139155), target1);
	target1 = MulAdd(na2, MF4x4(-0.060693484, -0.08285047, 0.06638212, 0.18479855, 0.11099276, -0.14470962, 0.16915078, 0.32247669, -0.10845523, 0.0027510398, -0.014941873, -0.15779859, 0.051481526, -0.14748912, 0.12125527, -0.059839584), target1);
	target1 = MulAdd(nb2, MF4x4(0.27571446, 0.01663349, -0.057985745, -0.089736536, -0.09541078, 0.18101417, 0.084854685, 0.11060913, 0.05631825, 0.066835634, -0.02837782, -0.049748126, -0.050051138, -0.05126488, 0.27121767, 0.06331115), target1);
	target1 = MulAdd(nc2, MF4x4(-0.13630085, -0.03787764, 0.13351586, -0.024081819, 0.10403757, -0.0034796793, -0.04838045, -0.064052396, -0.34672704, -0.06271465, -0.024577484, -0.13450806, -0.013759927, 0.11706738, 0.07913658, -0.016639082), target1);
	target1 = MulAdd(nd2, MF4x4(-0.023730129, 0.020174952, 0.048988737, -0.013395666, 0.0073305597, 0.059409764, -0.27721968, 0.13349204, -0.022947624, 0.112007216, -0.008175606, -0.14903043, -0.35755506, -0.02145208, -0.021762518, -0.17889674), target1);
	target1 = MulAdd(ne2, MF4x4(0.19315337, 0.16287236, -0.07667863, -0.020898499, -0.021058874, -0.20849414, -0.3571716, -0.13001479, 0.44977963, 0.016706442, -0.03471178, 0.35189477, 0.3050666, -0.019236205, 0.16278796, 0.3093703), target1);
	target1 = MulAdd(nf2, MF4x4(-0.1507458, -0.13747548, -0.05822537, 0.16035356, -0.08386089, -0.03476887, -0.0022021863, -0.032772254, 0.17572841, 0.004200287, 0.045312192, 0.27265742, -0.037853006, -0.056344658, -0.3095155, 0.15215549), target1);
	target1 = MulAdd(ng2, MF4x4(0.11428048, -0.19523771, 0.016499955, -0.03625986, 0.15670861, -0.077859454, -0.059640404, 0.023970904, -0.009806148, 0.0904747, -0.006978744, 0.15938658, 0.030886533, 0.13507655, -0.002613293, -0.1335748), target1);
	target1 = MulAdd(nh2, MF4x4(-0.20070468, 0.06281564, -0.026250493, -0.042895693, -0.06574456, 0.10412931, 0.12061968, -0.0750467, -0.10865931, -0.05715226, -0.022071969, 0.02608941, -0.21416737, -0.18582128, -0.091236554, -0.044943426), target1);
	target1 = MulAdd(ni2, MF4x4(-0.057988428, 0.21430638, -0.17991407, -0.051662743, 0.060244065, -0.021494022, -0.018070806, -0.09278776, -0.011404125, 0.064091586, 0.12852973, -0.16610947, 0.08740408, 0.045517463, -0.27932477, 0.11050971), target1);
	
	MF4 target2 = { 0.010324113, -0.01262194, 0.0762259, -0.014071781 };
	target2 = MulAdd(a1, MF4x4(-0.07579397, 0.008718031, 0.03874428, -0.022123579, 0.064964466, -0.27502275, -0.0053009577, 0.11669645, 0.007708085, 0.009340055, -0.13001843, -0.03758108, -0.07045759, -0.08749642, -0.21329811, 0.13205966), target2);
	target2 = MulAdd(b1, MF4x4(-0.14087188, -0.12068241, 0.046639618, 0.05115712, 0.108357444, -0.05040456, 0.03280633, 0.09336891, -0.055509757, -0.036777936, 0.043575723, -0.041975956, -0.17782387, -0.12977566, -0.0736514, 0.17304243), target2);
	target2 = MulAdd(c1, MF4x4(-0.2638534, 0.0385968, 0.14743716, 0.18057759, -0.036564615, 0.107838504, 0.08324167, 0.13403444, -0.41366392, 0.072824344, -0.013165103, 0.06114856, -0.040475495, -0.14222278, 0.10455181, 0.0021660402), target2);
	target2 = MulAdd(d1, MF4x4(0.30221993, -0.06315301, 0.057081617, -0.020285107, 0.053984016, 0.13086873, -0.30863532, 0.028010197, 0.0070908144, 0.19940577, -0.013766302, -0.039389495, 0.28064504, 0.05970737, 0.074613005, -0.10217121), target2);
	target2 = MulAdd(e1, MF4x4(0.042094592, -0.1725651, 0.3514404, 0.008126955, 0.08739713, 0.081543595, -0.12912413, 0.0854203, 0.28885832, 0.107783586, 0.22996111, 0.13907135, 0.071920335, -0.15172984, 0.07151959, 0.1406894), target2);
	target2 = MulAdd(f1, MF4x4(-0.1072496, 0.03934067, 0.20014063, 0.051399443, -0.29610988, 0.18659018, -0.17761967, 0.08701774, -0.17493258, -0.08035252, 0.03155133, -0.13986085, 0.023490375, 0.083998375, 0.014006612, 0.03860323), target2);
	target2 = MulAdd(g1, MF4x4(0.09324427, 0.10990628, -0.18758917, 0.0054821614, -0.09425237, 0.1192338, -0.063183226, -0.15047066, 0.15664004, 0.037881903, -0.06762073, 0.09622682, 0.028449943, -0.25338468, -0.18897526, -0.18360007), target2);
	target2 = MulAdd(h1, MF4x4(0.030310342, 0.2083269, -0.04938559, -0.009608655, 0.019751158, 0.12257741, 0.090964966, -0.09864261, 0.058817703, -0.053385522, 0.15931179, -0.10585003, 0.06986225, 0.3435001, -0.33307528, -0.14035752), target2);
	target2 = MulAdd(i1, MF4x4(0.13506691, -0.00015406386, -0.15279713, -0.2290177, 0.019568326, 0.41041428, 0.10566904, -0.08350839, 0.19839814, -0.31052053, -0.04471875, 0.07629561, -0.117245845, 0.19819061, 0.1683647, 0.11896638), target2);
	target2 = MulAdd(a2, MF4x4(0.06920538, 0.2656798, -0.06529862, -0.1695985, -0.21614018, 0.17208195, 0.123307765, -0.061470803, 0.07827313, -0.18543327, 0.0937214, 0.098630935, -0.17667519, -0.01978596, -0.09126346, -0.034487445), target2);
	target2 = MulAdd(b2, MF4x4(0.030779282, -0.24423946, -0.08623178, 0.1490136, 0.029337894, 0.17548573, -0.05990294, -0.29123273, -0.10020608, -0.3527181, -0.105286725, 0.27502912, -0.25686985, 0.18521136, -0.110095225, -0.07999611), target2);
	target2 = MulAdd(c2, MF4x4(-0.03266192, 0.045139533, -0.03275437, -0.13748369, 0.15633966, 0.089048125, -0.07592367, -0.09013536, -0.18907873, 0.08265683, -0.069233745, 0.27151683, -0.0647864, -0.15308899, 0.021954, 0.05528693), target2);
	target2 = MulAdd(d2, MF4x4(0.10284642, -0.14667438, 0.18669777, 0.053000864, -0.12383836, -0.037600834, 0.29438737, 0.04739594, 0.07846367, -0.11676573, -0.048153553, -0.34298027, 0.028358897, 0.119508564, 0.08012271, -0.019992562), target2);
	target2 = MulAdd(e2, MF4x4(-0.22123314, -0.2223458, 0.002969434, -0.07143056, 0.027859585, 0.010600199, 0.056626067, 0.15160584, -0.16350581, -0.044484995, -0.1805076, 0.33351076, 0.073631234, 0.0167081, 0.15704727, 0.107799366), target2);
	target2 = MulAdd(f2, MF4x4(-0.006882137, 0.19744347, 0.041128602, 0.17459555, 0.10376277, -0.12519689, 0.0993647, -0.13044195, 0.10485074, 0.1712284, 0.13369127, 0.24649777, -0.038975652, -0.24550107, 0.19567624, -0.09961197), target2);
	target2 = MulAdd(g2, MF4x4(0.24763626, -0.0902329, 0.21201743, 0.078442305, 0.013261817, -0.019013328, -0.07576136, 0.14993069, -0.24216306, -0.05666454, -0.064632, -0.38150248, 0.14649945, -0.020437164, -0.13821694, -0.026110074), target2);
	target2 = MulAdd(h2, MF4x4(0.21790951, -0.08288076, 0.011415891, -0.1446542, -0.15910968, -0.21221179, -0.06154624, -0.028623452, 0.10872824, 0.17089185, 0.26339474, -0.42544034, 0.095593184, 0.20962211, 0.0034138034, 0.024243662), target2);
	target2 = MulAdd(i2, MF4x4(-0.050784085, 0.06333505, 0.041011192, 0.17474842, 0.14517011, -0.4340653, -0.10313813, 0.12524489, 0.18353751, 0.4589042, -0.037463415, 0.07841999, -0.114173576, -0.10669665, 0.029463472, -0.14393249), target2);
	target2 = MulAdd(na1, MF4x4(0.12771326, -0.06622126, 0.08327681, -0.15113758, -0.114005744, 0.059280578, 0.04071302, -0.11074485, -0.23312584, -0.032968838, 0.13736604, -0.15776984, 0.067029156, 0.0580463, 0.20655325, -0.2112593), target2);
	target2 = MulAdd(nb1, MF4x4(0.16148107, 0.02879793, -0.24918973, 0.009605728, -0.102177374, 0.050518002, -0.00015101423, -0.046602443, 0.5081422, -0.044740383, -0.06243097, 0.076031074, 0.1157983, 0.03965003, 0.109161526, -0.36589798), target2);
	target2 = MulAdd(nc1, MF4x4(-0.018941574, 0.000912917, -0.2585099, 0.13668273, 0.062664494, -0.09246434, -0.14594543, -0.11160076, 0.015663203, -0.02447256, -0.070794076, 0.11807077, 0.12931514, 0.14109722, -0.07506544, -0.012781477), target2);
	target2 = MulAdd(nd1, MF4x4(-0.48816162, 0.16294348, 0.011336221, 0.107038386, -0.01978858, 0.039453425, 0.112853855, 0.007536018, -0.005471479, -0.11315905, 0.032013394, 0.11523904, -0.2504089, 0.04803124, -0.09689627, 0.24372064), target2);
	target2 = MulAdd(ne1, MF4x4(0.61343086, 0.09531598, -0.24803302, 0.23788263, 0.13495958, 0.24733612, 0.1575427, -0.06863399, 0.2341275, -0.15821049, -0.165848, 0.0290172, -0.010136783, 0.04415787, -0.2619951, 0.09987892), target2);
	target2 = MulAdd(nf1, MF4x4(0.19411229, 0.24528526, -0.250216, -0.33602244, 0.17639299, -0.052413136, 0.122578874, 0.028618507, 0.25713214, 0.22033587, -0.19680484, 0.028938502, -0.083384775, -0.06476429, 0.036840588, -0.14297847), target2);
	target2 = MulAdd(ng1, MF4x4(-0.2897587, -0.12176407, 0.19259763, -0.106649496, -0.026704982, -0.036201328, -0.06753124, 0.37967134, -0.20092241, 0.006229027, 0.12085137, -0.09810282, -0.1501556, -0.0099991355, 0.25044358, 0.08538966), target2);
	target2 = MulAdd(nh1, MF4x4(-0.11304407, -0.24147832, 0.21644448, -0.035938095, -0.036439262, -0.042730987, -0.04384442, 0.10325233, -0.32405272, -0.11873838, -0.15075137, -0.036929503, -0.10808143, 0.25799102, 0.13749036, 0.5451476), target2);
	target2 = MulAdd(ni1, MF4x4(-0.24142508, -0.04895773, 0.09022442, 0.2821465, -0.06298706, -0.1807906, 0.02960867, 0.22310257, -0.1915311, 0.2900501, 0.1670845, -0.080343634, 0.25779882, -0.27144584, -0.23575482, -0.14724477), target2);
	target2 = MulAdd(na2, MF4x4(0.020742219, -0.10571064, -0.0010137435, 0.14439318, 0.32805952, -0.027505733, -0.07111945, 0.07043296, -0.09525604, 0.03175366, -0.14633068, -0.15810682, 0.18050082, 0.08191363, 0.07047039, 0.0018573351), target2);
	target2 = MulAdd(nb2, MF4x4(-0.023874652, 0.14996628, 0.11298528, -0.1508891, -0.052415725, -0.02570088, 0.0055150646, 0.16365297, -0.046594325, 0.18095094, 0.09934885, -0.066233225, 0.2404304, -0.112728044, 0.14004207, 0.11369578), target2);
	target2 = MulAdd(nc2, MF4x4(0.14799033, 0.025304591, 0.031008242, 0.03795376, -0.15800071, -0.043169834, 0.10797239, 0.17129694, 0.09674189, -0.11010672, 0.07283912, -0.11063907, 0.108249694, 0.025199141, 0.09162024, -0.1827302), target2);
	target2 = MulAdd(nd2, MF4x4(-0.08983324, 0.07823903, -0.137839, 0.11909572, 0.11996334, -0.05947995, -0.25459376, -0.18159851, 0.044489045, 0.052461334, 0.13674203, 0.12579007, -0.33665392, -0.07313439, -0.013640538, -0.010538632), target2);
	target2 = MulAdd(ne2, MF4x4(0.0884388, -0.10034604, 0.047238693, 0.12025125, -0.16648497, -0.20305477, 0.08240087, -0.17453992, 0.19033237, 0.28438845, -0.32885036, 0.14011146, -0.13389368, -0.012868356, -0.15273216, -0.19119217), target2);
	target2 = MulAdd(nf2, MF4x4(0.09196779, -0.13800567, 0.08842335, -0.18658079, 0.17512907, 0.021311145, -0.06347847, -0.13827331, -0.10689703, -0.1707886, -0.15724367, -0.167876, 0.22493233, 0.3070637, -0.035266686, -0.0068385694), target2);
	target2 = MulAdd(ng2, MF4x4(-0.2739973, 0.07336105, -0.196827, 0.060224827, 0.05752693, -0.014346674, 0.025412507, -0.27530053, 0.27755278, -0.07631679, -0.053861864, 0.113329165, -0.31025892, -0.012681806, 0.06228483, -0.054306302), target2);
	target2 = MulAdd(nh2, MF4x4(-0.16827694, 0.16333361, 0.068389125, 0.24560109, 0.11659498, 0.052896734, -0.020310031, -0.17830387, -0.07551057, -0.01822214, -0.037451357, 0.24607496, -0.2033962, -0.11107965, 0.05005381, 0.13685274), target2);
	target2 = MulAdd(ni2, MF4x4(0.13665263, -0.24541081, 0.0012457973, -0.012630116, -0.09559698, 0.17756529, -0.039300505, -0.044217475, -0.22984356, -0.2294885, 0.104534455, -0.04131095, 0.084843494, 0.038027752, -0.106351435, 0.18853655), target2);

	tex3[gxy] = target1;
	tex4[gxy] = target2;
}

//!PASS 3
//!DESC Conv-4x3x3x16
//!IN tex3, tex4
//!OUT tex1, tex2, tex5
//!BLOCK_SIZE 8
//!NUM_THREADS 64

void Pass3(uint2 blockStart, uint3 threadId) {
	uint2 gxy = Rmp8x8(threadId.x) + blockStart;
	uint2 inputSize = GetInputSize();
	if (gxy.x >= inputSize.x || gxy.y >= inputSize.y) {
		return;
	}

	float2 inputPt = GetInputPt();
	float2 pos = (gxy + 0.5f) * inputPt;

	// [ a, d, g ]
	// [ b, e, h ]
	// [ c, f, i ]
	MF4 a1 = tex3.SampleLevel(sam, pos + float2(-inputPt.x, -inputPt.y), 0);
	MF4 b1 = tex3.SampleLevel(sam, pos + float2(-inputPt.x, 0), 0);
	MF4 c1 = tex3.SampleLevel(sam, pos + float2(-inputPt.x, inputPt.y), 0);
	MF4 d1 = tex3.SampleLevel(sam, pos + float2(0, -inputPt.y), 0);
	MF4 e1 = tex3.SampleLevel(sam, pos, 0);
	MF4 f1 = tex3.SampleLevel(sam, pos + float2(0, inputPt.y), 0);
	MF4 g1 = tex3.SampleLevel(sam, pos + float2(inputPt.x, -inputPt.y), 0);
	MF4 h1 = tex3.SampleLevel(sam, pos + float2(inputPt.x, 0), 0);
	MF4 i1 = tex3.SampleLevel(sam, pos + float2(inputPt.x, inputPt.y), 0);

	MF4 na1 = max(-a1, 0);
	MF4 nb1 = max(-b1, 0);
	MF4 nc1 = max(-c1, 0);
	MF4 nd1 = max(-d1, 0);
	MF4 ne1 = max(-e1, 0);
	MF4 nf1 = max(-f1, 0);
	MF4 ng1 = max(-g1, 0);
	MF4 nh1 = max(-h1, 0);
	MF4 ni1 = max(-i1, 0);

	a1 = max(a1, 0);
	b1 = max(b1, 0);
	c1 = max(c1, 0);
	d1 = max(d1, 0);
	e1 = max(e1, 0);
	f1 = max(f1, 0);
	g1 = max(g1, 0);
	h1 = max(h1, 0);
	i1 = max(i1, 0);

	MF4 a2 = tex4.SampleLevel(sam, pos + float2(-inputPt.x, -inputPt.y), 0);
	MF4 b2 = tex4.SampleLevel(sam, pos + float2(-inputPt.x, 0), 0);
	MF4 c2 = tex4.SampleLevel(sam, pos + float2(-inputPt.x, inputPt.y), 0);
	MF4 d2 = tex4.SampleLevel(sam, pos + float2(0, -inputPt.y), 0);
	MF4 e2 = tex4.SampleLevel(sam, pos, 0);
	MF4 f2 = tex4.SampleLevel(sam, pos + float2(0, inputPt.y), 0);
	MF4 g2 = tex4.SampleLevel(sam, pos + float2(inputPt.x, -inputPt.y), 0);
	MF4 h2 = tex4.SampleLevel(sam, pos + float2(inputPt.x, 0), 0);
	MF4 i2 = tex4.SampleLevel(sam, pos + float2(inputPt.x, inputPt.y), 0);

	MF4 na2 = max(-a2, 0);
	MF4 nb2 = max(-b2, 0);
	MF4 nc2 = max(-c2, 0);
	MF4 nd2 = max(-d2, 0);
	MF4 ne2 = max(-e2, 0);
	MF4 nf2 = max(-f2, 0);
	MF4 ng2 = max(-g2, 0);
	MF4 nh2 = max(-h2, 0);
	MF4 ni2 = max(-i2, 0);

	a2 = max(a2, 0);
	b2 = max(b2, 0);
	c2 = max(c2, 0);
	d2 = max(d2, 0);
	e2 = max(e2, 0);
	f2 = max(f2, 0);
	g2 = max(g2, 0);
	h2 = max(h2, 0);
	i2 = max(i2, 0);

	MF4 target1 = { -0.023354841, 0.0019475977, -0.0705355, -0.08216019 };
	target1 = MulAdd(a1, MF4x4(-0.06961854, 0.06914646, 0.120440066, -0.04889646, -0.012870159, 0.01994181, 0.052958567, -0.14740478, -0.0027199117, -0.004924673, 0.10131955, -0.11496505, -0.06742836, 0.08287776, 0.11206167, -0.021625644), target1);
	target1 = MulAdd(b1, MF4x4(-0.025003597, 0.05389498, 0.14938618, 0.12255602, 0.050963886, 0.16300994, 0.17633909, 0.03229484, 0.2092038, 0.13367431, -0.09538967, 0.1636076, -0.022082182, 0.10898033, 0.0422286, -0.062253885), target1);
	target1 = MulAdd(c1, MF4x4(-0.0018258828, 0.08333001, 0.002765037, -0.022241322, 0.1628206, 0.14671557, 0.3001151, 0.030986495, 0.05225914, -0.04880372, 0.15963705, 0.17972782, 0.055128947, 0.114626616, 0.03460699, -0.07679627), target1);
	target1 = MulAdd(d1, MF4x4(-0.08866054, 0.0882386, 0.13833097, -0.079257324, -0.03060485, 0.049487974, 0.092268504, -0.17009564, 0.021603461, 0.20750603, 0.18884364, -0.10977116, 0.31758478, 0.053426504, 0.093257, 0.14912026), target1);
	target1 = MulAdd(e1, MF4x4(0.13069148, 0.21368778, -0.4405162, -0.009193694, 0.090230525, -0.15897161, -0.005089127, -0.06011075, -0.27336648, -0.021869129, -0.2084852, -0.0850094, -0.10896211, 0.27229342, -0.044210993, -0.03346366), target1);
	target1 = MulAdd(f1, MF4x4(0.05807779, 0.08506817, 0.23984064, 0.12547795, 0.036945213, 0.039088245, -0.10716132, -0.15966031, 0.13548918, 0.07746645, -0.248966, -0.15717135, -0.059498273, 0.0088413125, -0.02828682, -0.021795277), target1);
	target1 = MulAdd(g1, MF4x4(0.013289853, 0.007272393, 0.06875863, -0.053158432, -0.03578172, 0.20148727, 0.1961931, -0.16910668, 0.03259818, 0.054221123, -0.0326064, 0.06493197, 0.053533003, -0.11878436, 0.14398894, -0.17543368), target1);
	target1 = MulAdd(h1, MF4x4(-0.17906332, 0.1111989, 0.047910325, 0.11560207, 0.09790123, -0.2023765, 0.04265116, 0.0075303926, 0.012974969, -0.0853146, -0.04037416, 0.14489946, -0.0716403, -0.055603035, -0.30376709, -0.011667526), target1);
	target1 = MulAdd(i1, MF4x4(-0.053314358, -0.012657763, 0.0077033425, 0.12168191, 0.016371705, 0.11979062, -0.08494259, -0.009617431, 0.1303907, 0.043279216, -0.17285421, 0.15823162, -0.030746695, 0.121796146, 0.13097613, 0.0024783302), target1);
	target1 = MulAdd(a2, MF4x4(-0.11677548, -0.06592395, -0.022185773, 0.0031006308, -0.00906918, -0.0006412884, -0.00083743286, 0.083697535, -0.060518038, 0.14058606, 0.122444086, 0.17866874, 0.02376487, -0.06369968, -0.026537767, 0.21466877), target1);
	target1 = MulAdd(b2, MF4x4(0.12340551, -0.015656117, 0.051990572, 0.04361656, -0.05291406, 0.10119005, 0.17603071, 0.10464767, 0.03288951, 0.091776796, -0.17373918, -0.12871055, 0.10205503, -0.17783496, -0.17020486, -0.09781929), target1);
	target1 = MulAdd(c2, MF4x4(-0.01845568, -0.008877597, 0.14279746, 0.031775143, 0.041680444, 0.08784194, 0.044564564, -0.0011678484, -0.010219994, 0.10472676, 0.046920944, -0.110975444, -0.1197329, -0.11303071, -0.14893234, -0.091113724), target1);
	target1 = MulAdd(d2, MF4x4(-0.03856561, -0.12173735, 0.040876064, 0.13847597, -0.14995924, -0.13332345, 0.18687452, -0.22562599, 0.08920785, -0.0017916666, 0.019448435, 0.2306492, -0.054546747, -0.1465318, -0.10628867, -0.0073827514), target1);
	target1 = MulAdd(e2, MF4x4(0.12689775, 0.11765595, 0.13039489, 0.06940679, 0.2672624, -0.03880143, -0.11693099, -0.05516293, -0.09665274, -0.2583138, 0.22954193, -0.19324702, -0.39629623, -0.35457405, 0.10052407, -0.19756024), target1);
	target1 = MulAdd(f2, MF4x4(-0.06307673, -0.096393906, -0.0075868783, -0.25133502, 0.03436604, -0.008201423, 0.06386583, 0.106548436, 0.014626536, 0.03485315, -0.043418273, -0.1141408, 0.005102567, -0.11701804, -0.01645601, -0.057083), target1);
	target1 = MulAdd(g2, MF4x4(-0.019062268, 0.020416953, -0.08854219, -0.037497565, 0.09449262, -0.09127615, -0.063330196, 0.08736769, -0.12394077, -0.17950213, -0.11101161, 0.16013645, -0.09370585, 0.0047447495, -0.04288296, 0.00314098), target1);
	target1 = MulAdd(h2, MF4x4(-0.08263743, -0.14441489, -0.14886282, -0.05694989, 0.4254853, 0.10864832, 0.26322174, -0.042006254, 0.24269578, -0.053833783, -0.11558995, -0.066605136, -0.064816564, -0.25914803, -0.017624624, 0.0402331), target1);
	target1 = MulAdd(i2, MF4x4(-0.100058846, -0.030422715, -0.19600148, -0.13322774, 0.1796998, 0.087852575, 0.07324559, -0.0047889417, 0.007248384, 0.08930289, 0.09643387, -0.0060126656, 0.16357517, -0.06628222, 0.030618697, 0.097391844), target1);
	target1 = MulAdd(na1, MF4x4(0.09539377, -0.10802722, -0.014952347, 0.1683223, -0.03919409, 0.041155327, -0.012186347, -0.030456683, -0.015024977, 0.061710294, 0.00049987395, 0.27338788, 0.04845922, -0.014114694, -0.06371904, 0.008664), target1);
	target1 = MulAdd(nb1, MF4x4(0.063082814, -0.02755945, -0.15663072, -0.053271208, 0.070173115, 0.038125586, -0.11840675, -0.016337764, -0.07963128, -0.06404943, 0.23033784, -0.007848355, -0.04434174, -0.092422634, -0.013985954, -0.038096108), target1);
	target1 = MulAdd(nc1, MF4x4(0.037121523, -0.020622304, 0.086708754, 0.045878958, -0.13188364, -0.022858748, -0.22411314, -0.08116162, 0.048863005, 0.039260563, -0.04934298, 0.11015131, 0.028177079, 0.025245499, 0.1067935, 0.15324049), target1);
	target1 = MulAdd(nd1, MF4x4(0.068235874, -0.14401375, -0.032677606, 0.02996807, -0.11290208, 0.114133574, -0.09627152, 0.053930115, 0.14560424, -0.15935057, -0.13495773, 0.29710987, -0.23231608, 0.14334352, 0.070753984, -0.08189047), target1);
	target1 = MulAdd(ne1, MF4x4(-0.22378983, -0.09858718, 0.30114698, -0.0048736916, 0.02198528, 0.21444769, -0.11228022, -0.14812283, 0.092372194, 0.1598949, 0.2534843, 0.4932573, -0.16642319, 0.12972073, -0.04147445, -0.09365905), target1);
	target1 = MulAdd(nf1, MF4x4(-0.132199, -0.0798279, -0.18289213, -0.15133642, -0.033057958, 0.007495456, 0.070398286, 0.049111973, -0.03361502, 0.032059964, 0.003850814, 0.22922683, 0.20279214, -0.07350396, 0.27681342, 0.11891455), target1);
	target1 = MulAdd(ng1, MF4x4(-0.095355205, -0.08533997, -0.043466177, 0.03183743, 0.0048090555, -0.07969942, -0.044769235, 0.15350139, 0.06485437, -0.027922742, 0.0850892, 0.00069019396, 0.035737295, 0.20380683, 0.03413393, 0.025630401), target1);
	target1 = MulAdd(nh1, MF4x4(0.26616514, -0.024066277, 0.09220501, 0.09643391, -0.014585791, 0.22894275, -0.053128377, -0.08719867, -0.08819027, 0.01932318, -0.113633566, -0.15435793, 0.10542983, 0.029819246, 0.33675614, -0.059085276), target1);
	target1 = MulAdd(ni1, MF4x4(-0.031325538, 0.040770013, -0.049561024, -0.2095101, -0.09537227, -0.075998954, -0.04323478, -0.05470401, -0.110066876, 0.059249427, -0.042351052, -0.047700178, 0.21932366, -0.12850443, 0.035361454, 0.013699006), target1);
	target1 = MulAdd(na2, MF4x4(-0.08417607, 0.113477044, 0.03574209, 0.007835156, 0.2021717, 0.030678429, 0.19313626, -0.03506592, 0.04233059, -0.08540689, -0.07128929, -0.13245375, -0.08918939, -0.042622462, 0.19011301, -0.18228586), target1);
	target1 = MulAdd(nb2, MF4x4(-0.19981891, -0.16255717, 0.042949058, -0.06921157, 0.279451, -0.11536949, -0.13747527, -0.10020231, -0.013784027, -0.06727259, 0.3556115, 0.08460814, -0.15348805, -0.07692103, -0.018658075, 0.0037634284), target1);
	target1 = MulAdd(nc2, MF4x4(-0.09063814, -0.036312047, 0.13528036, 0.0070792423, 0.11834377, 0.02331524, 0.09386154, 0.07144935, 0.033078104, -0.1397121, 0.09283168, 0.2118868, -0.06313442, 0.032146804, 0.0060367053, 0.005822348), target1);
	target1 = MulAdd(nd2, MF4x4(0.035949346, 0.06469895, -0.0051385965, -0.078584194, 0.43195483, 0.0045206803, -0.24012396, 0.21436183, -0.013394304, -0.04198491, 0.06645506, -0.23869638, -0.02311661, 0.06589808, 0.16800866, -0.21120183), target1);
	target1 = MulAdd(ne2, MF4x4(-0.24937367, -0.042277586, 0.08117994, 0.3105402, -0.26087892, -0.10325264, -0.08689298, 0.0064907144, 0.031937066, 0.09783758, -0.9514562, -0.104631096, 0.27990052, 0.36389935, 0.057687905, 0.14072314), target1);
	target1 = MulAdd(nf2, MF4x4(-0.19865227, 0.09398578, 0.06911146, 0.13077813, 0.024283953, -0.0036808057, -0.036725305, -0.024085987, 0.061556816, 0.0029027078, 0.24621862, 0.112107046, 0.068239614, 0.052718107, 0.20803368, 0.065064415), target1);
	target1 = MulAdd(ng2, MF4x4(-0.055511028, -0.08662344, -0.074801624, -0.021917107, 0.18730342, 0.047116343, 0.14872652, 0.10580926, 0.16962165, 0.16628978, 0.17343876, -0.1697205, 0.047853447, -0.22705628, 0.031780355, -0.09273609), target1);
	target1 = MulAdd(nh2, MF4x4(-0.17306295, -0.067308225, -0.17174196, -0.13221754, -0.24622467, 0.029901514, -0.12799668, -0.04145667, -0.14546, 0.013308366, 0.028113116, 0.1678875, 0.07922657, -0.015584258, 0.17059629, 0.07330948), target1);
	target1 = MulAdd(ni2, MF4x4(-0.09916512, 0.0623665, -0.022458963, 0.061962493, 0.18569344, -0.06590287, 0.111395456, 0.08477448, -0.03609452, 0.024279302, -0.083497405, 0.06459743, -0.22963138, -0.12262581, 0.006980887, -0.06653474), target1);
	
	MF4 target2 = { 0.08523788, 0.052322272, 0.08955637, -0.06945023 };
	target2 = MulAdd(a1, MF4x4(-0.13703531, 0.06135254, -0.05032855, 0.0039429665, -0.05997914, 0.03737832, -0.09703001, -0.08112204, -0.096779875, 0.086732335, 0.03021232, -0.14636067, 0.079296306, 0.006656948, 0.08904937, 0.06196539), target2);
	target2 = MulAdd(b1, MF4x4(-0.26374274, 0.16698441, -0.08554561, 0.03734819, -0.08525629, 0.12257442, 0.015473835, 0.13266069, 0.008439022, -0.05002345, 0.03232084, 0.17349075, 0.014541135, -0.10353582, 0.13339484, -0.13474584), target2);
	target2 = MulAdd(c1, MF4x4(0.05637785, -0.049726896, 0.06597188, 0.0058668824, -0.10623723, 0.13441847, 0.015975956, -0.07811197, 0.05975957, -0.062021587, -0.06533749, 0.083735935, 0.02666556, 0.029904561, -0.0102926055, -0.10931666), target2);
	target2 = MulAdd(d1, MF4x4(-0.22616413, 0.042830274, -0.116208926, -0.053796053, -0.1112898, 0.20703097, -0.34109348, -0.065111674, -0.17255561, 0.16784647, 0.00193431, -0.043237597, -0.02353095, -0.1302526, 0.05119598, 0.01403269), target2);
	target2 = MulAdd(e1, MF4x4(0.086109385, -0.053006437, -0.24992542, 0.007938272, -0.0027849772, 0.09198081, -0.17596659, 0.030577915, -0.31807357, -0.29618275, 0.0056317504, 0.3662508, 0.16753437, -0.12481447, -0.057597708, -0.14973637), target2);
	target2 = MulAdd(f1, MF4x4(-0.14585754, 0.027715279, -0.039035518, 0.11505972, 0.0038059987, -0.20368981, -0.014822689, 0.094012834, -0.20693347, -0.37216228, -0.12690443, 0.2727411, -0.15475404, -0.01948714, -0.12414737, 0.10378582), target2);
	target2 = MulAdd(g1, MF4x4(-0.11750072, 0.051394574, -0.011073509, -0.1100907, -0.1389209, -0.10706716, 0.0017484069, -0.059556484, -0.20038931, 0.24976069, -0.011129469, -0.080446415, 0.19259459, -0.14515446, -0.07275811, 0.039244935), target2);
	target2 = MulAdd(h1, MF4x4(-0.101780266, 0.003889027, 0.010705813, 0.011088775, -0.20406786, -0.009807119, 0.23070864, -0.030722639, -0.012015954, 0.025211284, -0.29246482, 0.04907962, -0.10485314, 0.21213223, 0.15788344, -0.014188987), target2);
	target2 = MulAdd(i1, MF4x4(0.1546438, -0.15895118, 0.010730076, 0.034053337, -0.018741185, -0.008467293, 0.13143812, 0.022905342, -0.27543658, 0.3054419, 0.07025048, 0.29454592, -0.0032350307, 0.01671764, 0.081928045, -0.10051137), target2);
	target2 = MulAdd(a2, MF4x4(-0.014834404, 0.07487839, -0.16554666, -0.04127725, 0.15239598, -0.017607473, 0.09927426, 0.15027349, -0.2073968, 0.041613225, -0.10290223, -0.12565911, 0.022021815, -0.07609557, -0.16338238, 0.04468512), target2);
	target2 = MulAdd(b2, MF4x4(0.01768976, 0.0637369, 0.006542782, -0.0022799321, -0.14728844, -0.058199093, -0.029928437, 0.079634584, 0.095769696, -0.13526416, 0.20718366, -0.10116214, 0.1688786, -0.08906526, 0.020397741, 0.06541649), target2);
	target2 = MulAdd(c2, MF4x4(-0.033067044, 0.10095467, -0.13792777, 0.022673525, -0.012797848, -0.11222105, 0.11443862, 0.04893716, 0.11389547, -0.07337629, 0.21447009, -0.032212257, 0.23070163, -0.18156143, 0.14542435, -0.10207653), target2);
	target2 = MulAdd(d2, MF4x4(-0.22985588, 0.012290226, 0.018557416, -0.064000085, 0.012936774, -0.104329854, -0.0719669, 0.24160251, 0.03716294, -0.093069404, -0.12110873, 0.013251573, -0.12731232, -0.1995954, -0.07679729, 0.06823493), target2);
	target2 = MulAdd(e2, MF4x4(-0.23359679, -0.052702624, -0.08710696, 0.19826421, 0.12880315, 0.19875911, -0.20581602, 0.32980308, -0.14479029, 0.099422045, 0.44737315, 0.13044962, 0.12935589, -0.13621494, 0.14902137, 0.09162335), target2);
	target2 = MulAdd(f2, MF4x4(0.10801082, -0.22644557, 0.035719793, -0.12396268, 0.2906566, 0.119107775, -0.15470679, 0.17997102, -0.12866725, -0.12695445, -0.06832712, 0.017622665, 0.08215481, 0.065239124, -0.1256659, -0.06811625), target2);
	target2 = MulAdd(g2, MF4x4(-0.097956754, 0.09383762, -0.19813508, 0.0035260199, -0.14278924, 0.0660843, 0.19110036, 0.11025648, 0.15489757, 0.011157471, -0.16014035, -0.050144047, 0.0032884583, 0.061513808, -0.03385016, -0.08534137), target2);
	target2 = MulAdd(h2, MF4x4(0.09499595, 0.04162155, -0.26091605, -0.18066265, -0.21523187, -0.036668014, 0.09586408, 0.059850723, -0.10890033, 0.28857672, -0.32993382, 0.05107536, 0.012024929, -0.27968574, 0.15081042, -0.07215633), target2);
	target2 = MulAdd(i2, MF4x4(0.15673614, -0.064684846, -0.13838115, 0.1264376, -0.23772664, 0.11594999, 0.0898036, -0.092647165, 0.26081505, 0.05110054, -0.017965768, 0.06740709, -0.24977967, 0.05645255, -0.08204664, 0.0435078), target2);
	target2 = MulAdd(na1, MF4x4(0.02560865, -0.1613835, 0.05876215, 0.101586774, -0.00058163394, 0.0013674656, 0.039857507, -0.002919488, 0.05573127, -0.04311352, 0.05305971, 0.10097247, 0.036392104, -0.025071293, 0.029137935, -0.08593101), target2);
	target2 = MulAdd(nb1, MF4x4(0.12406646, -0.21399136, 0.05611706, 0.021867402, -0.037916705, 0.05941278, 0.11277805, -0.12387807, 0.008577062, -0.045022104, 0.16465645, -0.07607619, 0.035939474, 0.07221297, -0.13557361, 0.07806311), target2);
	target2 = MulAdd(nc1, MF4x4(-0.19589397, 0.011909766, -0.01258029, -0.065313555, 0.07366803, -0.0812486, 0.115863465, 0.019752543, -0.15854625, 0.11246406, 0.007201303, 0.0008530298, -0.0287012, -0.036224626, 0.059641607, 0.09416462), target2);
	target2 = MulAdd(nd1, MF4x4(0.20361906, -0.20671111, -0.1126041, 0.049152024, 0.17586707, 0.10047246, 0.13149028, -0.16302691, -0.08559989, -0.17756243, -0.0061752857, 0.124775924, 0.020011704, 0.17147969, -0.0003063916, -0.015890911), target2);
	target2 = MulAdd(ne1, MF4x4(0.11051906, 0.13774526, 0.29333818, -0.029932505, -0.07021508, 0.046212852, 0.11793092, -0.081830084, -0.18609521, -0.108229816, -0.044969153, -0.041069634, -0.13936938, 0.11356429, 0.19260931, 0.093210496), target2);
	target2 = MulAdd(nf1, MF4x4(0.010555152, -0.15726428, -0.13187453, -0.12396212, 0.17309372, 0.100884624, 0.11547714, -0.030650318, -0.21877939, -0.0015167049, -0.090150684, 0.029793834, 0.1465573, -0.038805004, -0.033211514, -0.04926991), target2);
	target2 = MulAdd(ng1, MF4x4(0.10250675, -0.030922988, -0.008545946, 0.024706079, 0.105154864, -0.06838902, -0.12627976, 0.032457255, 0.21747419, -0.12865087, -0.056018118, 0.07152061, -0.11214344, -0.029831404, 0.044855718, -0.04316971), target2);
	target2 = MulAdd(nh1, MF4x4(0.12806997, 0.12385188, -0.06831653, -0.015933594, 0.08645126, 0.013043054, -0.19599608, -0.060719345, -0.23076192, 0.19181651, 0.1292978, 0.036317572, -0.061692618, -0.25434494, -0.10012762, 0.06366783), target2);
	target2 = MulAdd(ni1, MF4x4(-0.11098094, 0.034632366, -0.053560194, 0.08499573, 0.20842391, -0.020262053, -0.023394845, 0.048971336, 0.10436084, 0.12614205, 0.035942093, -0.07592917, -0.07455495, -0.012119416, -0.011834865, 0.21032205), target2);
	target2 = MulAdd(na2, MF4x4(-0.00055114913, -0.06662242, -0.009248925, -0.0024843027, -0.22993802, -0.04828541, -0.08667693, -0.093717255, 0.14400347, 0.030130679, -0.01590651, 0.10399553, 0.14478837, -0.11228224, -0.039653912, -0.042144097), target2);
	target2 = MulAdd(nb2, MF4x4(-0.011044514, -0.09870122, -0.24879128, 0.111903004, 0.092567004, 0.06100228, 0.0053522107, 0.065252475, -0.18228072, 0.25602147, -0.2863954, 0.103064165, 0.052214783, -0.017557586, -0.07434391, 0.021111684), target2);
	target2 = MulAdd(nc2, MF4x4(0.04537496, -0.024985183, -0.15247425, -0.0009907635, -0.09677889, 0.09858206, -0.030702371, 0.03539458, -0.029408665, 0.24335481, -0.1918429, 0.08056781, 0.1548214, 0.2850923, -0.15131058, -0.052048493), target2);
	target2 = MulAdd(nd2, MF4x4(0.055409238, -0.13090813, -0.016612396, -0.019183576, -0.18499215, -0.013184845, 0.038750056, 0.10953814, -0.18437819, 0.19183092, -0.09780726, -0.046532292, -0.10841146, -0.17717329, -0.1731886, -0.06741823), target2);
	target2 = MulAdd(ne2, MF4x4(0.27919188, -0.14904179, 0.22850563, -0.17785722, -0.32835802, -0.19134615, 0.32093298, 0.24667856, 0.51687604, -0.59745705, 0.23057328, -0.41411245, -0.4234339, -0.03083826, -0.13972719, 0.1729651), target2);
	target2 = MulAdd(nf2, MF4x4(0.042352367, -0.109207705, -0.31047532, 0.08896513, -0.2187999, -0.117951825, 0.060705405, -0.10287316, 0.013815159, -0.023699438, -0.053614594, 0.09065406, -0.15286967, -0.101803675, 0.019537682, 0.12476822), target2);
	target2 = MulAdd(ng2, MF4x4(0.0016159728, 0.04094818, 0.012745902, -0.051958837, 0.014557628, 0.00061195926, -0.11669799, 0.08763203, -0.27820277, 0.17871988, 0.10634548, 0.05234229, 0.03827577, -0.3117398, 0.027675012, 0.0655132), target2);
	target2 = MulAdd(nh2, MF4x4(-0.0025006514, -0.1457415, 0.053443488, -0.0050932285, 0.01582735, 0.18783967, -0.066718, -0.15485887, -0.039741408, -0.21280284, 0.1502977, 0.09507925, 0.17178543, -0.014238171, -0.35757875, 0.026410697), target2);
	target2 = MulAdd(ni2, MF4x4(-0.19434428, -0.079038315, -0.017264817, -0.04004242, 0.0063378955, 0.027904915, 0.02571677, 0.09895997, -0.036605608, -0.19889063, 0.015920812, -0.014095519, 0.4363826, -0.14143194, 0.015463533, -0.1656284), target2);

	MF3 target3 = MulAdd(e1, MF4x3(0.121882804, 0.055417646, 0.037575886, 0.040015355, 0.10440659, 0.120197006, 0.008896276, 0.07269119, 0.09253319, 0.009000448, -0.033739295, -0.059260685), 0.0);
	target3 = MulAdd(e2, MF4x3(-0.048027042, 0.09210703, 0.123745404, -0.007914943, 0.05483587, 0.054822505, -0.005998682, 0.005822986, 0.009868176, -0.05866792, -0.04236153, -0.022935968), target3);
	target3 = MulAdd(ne1, MF4x3(-0.091270015, -0.033997003, -0.012321896, -0.037983265, -0.078790314, -0.085029654, 0.10656225, 0.0008334142, -0.0041227583, 0.077364065, 0.033960085, 0.029391684), target3);
	target3 = MulAdd(ne2, MF4x3(0.15057671, -0.037442014, -0.037083894, 0.015493511, -0.016119987, -0.027061606, -0.012329675, 0.0060544596, -0.019787522, 0.12182345, 0.11346318, 0.08640806), target3);

	tex1[gxy] = target1;
	tex2[gxy] = target2;
	tex5[gxy] = MF4(target3, 1);
}

//!PASS 4
//!DESC Conv-4x3x3x16
//!IN tex1, tex2, tex5
//!OUT tex3, tex4, tex6
//!BLOCK_SIZE 8
//!NUM_THREADS 64

void Pass4(uint2 blockStart, uint3 threadId) {
	uint2 gxy = Rmp8x8(threadId.x) + blockStart;
	uint2 inputSize = GetInputSize();
	if (gxy.x >= inputSize.x || gxy.y >= inputSize.y) {
		return;
	}

	float2 inputPt = GetInputPt();
	float2 pos = (gxy + 0.5f) * inputPt;

	// [ a, d, g ]
	// [ b, e, h ]
	// [ c, f, i ]
	MF4 a1 = tex1.SampleLevel(sam, pos + float2(-inputPt.x, -inputPt.y), 0);
	MF4 b1 = tex1.SampleLevel(sam, pos + float2(-inputPt.x, 0), 0);
	MF4 c1 = tex1.SampleLevel(sam, pos + float2(-inputPt.x, inputPt.y), 0);
	MF4 d1 = tex1.SampleLevel(sam, pos + float2(0, -inputPt.y), 0);
	MF4 e1 = tex1.SampleLevel(sam, pos, 0);
	MF4 f1 = tex1.SampleLevel(sam, pos + float2(0, inputPt.y), 0);
	MF4 g1 = tex1.SampleLevel(sam, pos + float2(inputPt.x, -inputPt.y), 0);
	MF4 h1 = tex1.SampleLevel(sam, pos + float2(inputPt.x, 0), 0);
	MF4 i1 = tex1.SampleLevel(sam, pos + float2(inputPt.x, inputPt.y), 0);

	MF4 na1 = max(-a1, 0);
	MF4 nb1 = max(-b1, 0);
	MF4 nc1 = max(-c1, 0);
	MF4 nd1 = max(-d1, 0);
	MF4 ne1 = max(-e1, 0);
	MF4 nf1 = max(-f1, 0);
	MF4 ng1 = max(-g1, 0);
	MF4 nh1 = max(-h1, 0);
	MF4 ni1 = max(-i1, 0);

	a1 = max(a1, 0);
	b1 = max(b1, 0);
	c1 = max(c1, 0);
	d1 = max(d1, 0);
	e1 = max(e1, 0);
	f1 = max(f1, 0);
	g1 = max(g1, 0);
	h1 = max(h1, 0);
	i1 = max(i1, 0);

	MF4 a2 = tex2.SampleLevel(sam, pos + float2(-inputPt.x, -inputPt.y), 0);
	MF4 b2 = tex2.SampleLevel(sam, pos + float2(-inputPt.x, 0), 0);
	MF4 c2 = tex2.SampleLevel(sam, pos + float2(-inputPt.x, inputPt.y), 0);
	MF4 d2 = tex2.SampleLevel(sam, pos + float2(0, -inputPt.y), 0);
	MF4 e2 = tex2.SampleLevel(sam, pos, 0);
	MF4 f2 = tex2.SampleLevel(sam, pos + float2(0, inputPt.y), 0);
	MF4 g2 = tex2.SampleLevel(sam, pos + float2(inputPt.x, -inputPt.y), 0);
	MF4 h2 = tex2.SampleLevel(sam, pos + float2(inputPt.x, 0), 0);
	MF4 i2 = tex2.SampleLevel(sam, pos + float2(inputPt.x, inputPt.y), 0);

	MF4 na2 = max(-a2, 0);
	MF4 nb2 = max(-b2, 0);
	MF4 nc2 = max(-c2, 0);
	MF4 nd2 = max(-d2, 0);
	MF4 ne2 = max(-e2, 0);
	MF4 nf2 = max(-f2, 0);
	MF4 ng2 = max(-g2, 0);
	MF4 nh2 = max(-h2, 0);
	MF4 ni2 = max(-i2, 0);

	a2 = max(a2, 0);
	b2 = max(b2, 0);
	c2 = max(c2, 0);
	d2 = max(d2, 0);
	e2 = max(e2, 0);
	f2 = max(f2, 0);
	g2 = max(g2, 0);
	h2 = max(h2, 0);
	i2 = max(i2, 0);

	MF4 target1 = { 0.013646388, -0.021442367, 0.0045393505, -0.037433166 };
	target1 = MulAdd(a1, MF4x4(0.048841953, -0.010713874, 0.09238948, -0.0789676, -0.093295254, 0.063662216, -0.023454266, -0.06739832, 0.027439933, 0.007399632, -0.03550259, -0.013834889, 0.17168441, 0.06177229, 0.023950668, 0.14574073), target1);
	target1 = MulAdd(b1, MF4x4(0.117296845, -0.07858486, -0.02099164, -0.024150673, -0.11662526, -0.26440877, -0.05449493, -0.13366842, -0.06870016, 0.12457937, 0.25052628, 0.013982828, 0.15127566, -0.031653196, -0.13851896, 0.04148151), target1);
	target1 = MulAdd(c1, MF4x4(0.024360385, -0.31051615, 0.012448293, -0.11265428, 0.06123606, -0.0701936, 0.033618104, -0.064061284, -0.06969811, -0.108838804, 0.014163671, 0.02596177, 0.20071186, -0.0028744373, 0.13663651, -0.05592813), target1);
	target1 = MulAdd(d1, MF4x4(0.13492568, -0.0726796, 0.13431883, -0.085713945, 0.056370113, 0.115660414, -0.14475793, 0.0044200714, 0.027387753, 0.045452334, 0.28178552, 0.017371183, 0.17304336, 0.0582999, 0.14465337, 0.046005037), target1);
	target1 = MulAdd(e1, MF4x4(0.064034574, 0.041531377, 0.08218889, -0.44529077, -0.010563538, -0.14926371, 0.051012456, 0.08209141, 0.24089444, -0.225398, -0.22259372, -0.26353076, -0.1687418, -0.11501685, -0.016655196, -0.09882357), target1);
	target1 = MulAdd(f1, MF4x4(-0.019985389, -0.19189276, -0.104917, -0.11139956, -0.08406414, 0.031484302, -0.082132496, 0.025829919, 0.07512055, 0.31116992, 0.061163265, -0.074850895, -0.091695994, -0.26492774, -0.06617365, 0.06590624), target1);
	target1 = MulAdd(g1, MF4x4(0.1326703, 0.13008863, -0.1659525, -0.058325157, -0.047528613, 0.06777741, 0.06953616, 0.010587038, 0.031675722, -0.08119788, -0.11269768, -0.06225964, -0.26593694, 0.03627298, 0.12866129, 0.17876588), target1);
	target1 = MulAdd(h1, MF4x4(-0.29016155, -0.12549841, -0.050858997, -0.088932805, 0.002237332, 0.01287246, 0.30138868, -0.071756564, -0.061206467, -0.11114371, -0.25731218, -0.11551616, -0.069513, -0.004583348, -0.10647163, 0.01981785), target1);
	target1 = MulAdd(i1, MF4x4(0.16387528, 0.03450354, 0.03422023, -0.014030813, 0.13418834, -0.010909722, -0.00447121, -0.03082622, -0.23983373, -0.020655053, -0.054034587, -0.07133469, 0.21171515, 0.06268651, -0.1738516, -0.15001713), target1);
	target1 = MulAdd(a2, MF4x4(0.040721033, -0.037582736, -0.13819644, -0.123978324, 0.1650318, 0.033942625, 0.17534302, 0.06452234, 0.18384823, 0.0048657497, 0.20220642, -0.0025760103, 0.011163899, 0.027265374, -0.051284578, 0.19202651), target1);
	target1 = MulAdd(b2, MF4x4(-0.057493486, -0.031516504, 0.10835143, -0.040618125, -0.07762303, -0.06787725, 0.025559613, -0.0055560498, -0.0017830619, 0.020185964, -0.06656476, -0.008523214, 0.32331157, -0.21633361, 0.15338033, -0.104042485), target1);
	target1 = MulAdd(c2, MF4x4(-0.18544987, -0.090446, -0.26797467, -0.082941435, -0.15003708, -0.11446041, -0.0394892, 1.1379096e-05, 0.04978554, 0.3350256, 0.032780237, 0.034625802, 0.0596261, 0.045886245, 0.009002243, 0.04746998), target1);
	target1 = MulAdd(d2, MF4x4(-0.17104147, 0.0054165213, 0.09161088, -0.0673989, -0.119282715, -0.09094731, 0.47243354, 0.09914267, -0.13958418, -0.0050379517, 0.14352496, 0.18380567, -0.16128838, 0.08766813, 0.013876981, -0.09808636), target1);
	target1 = MulAdd(e2, MF4x4(0.09617889, 0.045525175, -0.2550057, -0.02874332, 0.2743444, -0.20102581, 0.008461914, 0.16626629, -0.13309516, -0.19307104, 0.15780488, 0.15518525, -0.2790243, 0.056782067, 0.16836968, 0.17771688), target1);
	target1 = MulAdd(f2, MF4x4(-0.10694667, 0.14490083, -0.037976455, 0.013456577, -0.1166783, 0.060722847, 0.07323464, -0.013812333, 0.03234213, 0.50859296, -0.20670377, -0.019631205, -0.022543924, 0.21776745, -0.093769215, 0.12193299), target1);
	target1 = MulAdd(g2, MF4x4(-0.15260598, -0.04798592, -0.02370747, -0.005714705, 0.030857049, -0.16643822, 0.23971851, 0.08117996, -0.069645695, -0.06674784, 0.033509918, 0.06333286, 0.14010383, 0.02218942, -0.036704093, 0.043163314), target1);
	target1 = MulAdd(h2, MF4x4(0.14653306, 0.002759894, 0.10548246, 0.24976018, 0.3212893, -0.07108953, 0.14068738, 0.29437128, -0.020556152, -0.17813908, 0.1989112, 0.12182122, -0.19231579, 0.06547012, -0.032785345, 0.089717634), target1);
	target1 = MulAdd(i2, MF4x4(-0.23632105, -0.027022298, 0.00586518, 0.01836479, -0.2854795, -0.035417695, -0.07586866, 0.0715673, 0.17984483, 0.11210451, 0.032767817, 0.097993985, -0.010899036, 0.15933803, 0.05454052, 0.06768528), target1);
	target1 = MulAdd(na1, MF4x4(-0.017289463, -0.058823984, 0.0807603, 0.32464716, 0.2756627, 0.036061637, -0.034578573, -0.08811335, 0.031841308, 0.11359879, 0.07553143, -0.028648997, 0.057192322, 0.07769366, -0.1998847, -0.06258051), target1);
	target1 = MulAdd(nb1, MF4x4(0.0422091, 0.046305113, 0.028377453, -0.031071126, 0.06866086, 0.1538135, -0.009288249, -0.25543538, 0.07067607, -0.114061736, -0.024740022, -0.11824987, -0.17426041, 0.0028396242, 0.12849464, 0.057790644), target1);
	target1 = MulAdd(nc1, MF4x4(0.057328146, 0.030677445, 0.07496485, 0.07847613, -0.22358766, -0.15659446, -0.18270054, -0.21316889, 0.084770195, 0.013863274, -0.001335942, -0.04027535, -0.15230416, -0.048156176, -0.04614562, 0.089494966), target1);
	target1 = MulAdd(nd1, MF4x4(-0.117369525, 0.026577681, -0.1941765, 0.14904885, -0.16210394, -0.19549404, 0.19999947, 0.37138188, 0.14809363, -0.05078633, -0.092692114, -0.08533522, 0.12769112, 0.017061725, 0.104464866, -0.026744602), target1);
	target1 = MulAdd(ne1, MF4x4(0.0880251, -0.005333869, -0.10327546, 0.30419552, 0.107773595, 0.02335926, -0.19014318, 0.19670166, -0.09443473, 0.10621109, 0.36843884, 0.13197622, 0.24537645, 0.4032842, 0.21791221, 0.08400414), target1);
	target1 = MulAdd(nf1, MF4x4(0.06408587, 0.15366535, 0.042582024, 0.15629277, 0.028716238, -0.013479061, -0.23052843, -0.2992272, -0.050045617, -0.27255702, -0.038093377, 0.0031149297, -0.05625518, 0.52598304, -0.0845234, -0.09116851), target1);
	target1 = MulAdd(ng1, MF4x4(0.02294159, -0.011902539, 0.00079296535, 0.030631313, 0.02114366, 0.082455896, 0.09450867, -0.08027284, 0.042443607, 0.15427661, 0.11882799, -0.040319934, 0.23706424, -0.107808165, -0.1730313, -0.06340064), target1);
	target1 = MulAdd(nh1, MF4x4(0.2645207, 0.002157867, -0.095794424, 0.1141035, 0.08255855, -0.06977906, -0.04348005, 0.27864936, -0.1197219, 0.015997604, 0.09500464, -0.0010631803, 0.07198933, -0.053128377, 0.02176274, -0.001298847), target1);
	target1 = MulAdd(ni1, MF4x4(-0.045475803, 0.03626341, -0.00891833, 0.17907676, -0.2810277, 0.13725498, -0.02413441, -0.08605496, 0.08306595, -0.012227401, -0.0070282067, -0.019027572, -0.13443586, -0.041331865, 0.029120144, -0.00490357), target1);
	target1 = MulAdd(na2, MF4x4(-0.13398282, 0.06475972, 0.2528711, 0.02553969, -0.13428321, -0.03931247, 0.11360386, -0.18912545, -0.3725821, -0.018747944, -0.20893294, -0.012743096, 0.07444533, -0.15381604, 0.29776138, 0.10601149), target1);
	target1 = MulAdd(nb2, MF4x4(-0.21793252, 0.07817356, -0.109576665, 0.19185133, -0.072846025, 0.04960289, -0.07506936, 0.12839878, -0.0061091883, 0.093669325, 0.009295678, 0.03780657, -0.10901407, 0.1375137, -0.0745914, 0.1468883), target1);
	target1 = MulAdd(nc2, MF4x4(0.10739044, 0.30611086, 0.1585515, 0.07903283, 0.05612715, -0.0061900485, 0.13646163, 0.15230569, 0.036846787, -0.15846778, -0.18765065, 0.06611226, -0.07209187, 0.056037188, 0.04302953, -0.03887873), target1);
	target1 = MulAdd(nd2, MF4x4(0.05618538, -0.072312586, -0.018046018, 0.049542785, -0.033638306, -0.035169322, -0.25882784, -0.036425237, 0.43763217, -0.07049093, 0.08085481, 0.013634128, -0.2701461, -0.13007875, 0.09603447, 0.2479431), target1);
	target1 = MulAdd(ne2, MF4x4(-0.02283992, -0.24593964, 0.04616348, 0.023422526, -0.20994014, 0.064769074, -0.07680045, -0.30547765, 0.1518723, 0.31953967, -0.12841515, -0.19525428, -0.0076093865, -0.112106465, -0.04573789, -0.04834478), target1);
	target1 = MulAdd(nf2, MF4x4(-0.008045419, -0.20285496, 0.15290824, 0.036240693, 0.11959966, -0.15712506, 0.096806675, 0.008780234, -0.19716795, -0.3824029, 0.1376541, 0.13325086, -0.103316806, -0.31788048, -0.071698256, -0.25901568), target1);
	target1 = MulAdd(ng2, MF4x4(0.13714787, 0.020738773, 0.13716534, 0.12359137, -0.038154524, 0.053202964, -0.12023912, 0.09011213, -0.012448548, -0.026505312, -0.11293235, 0.10613704, -0.39916727, 0.041521315, 0.10659441, 0.027749784), target1);
	target1 = MulAdd(nh2, MF4x4(-0.26475835, 0.044597875, -0.31229413, -0.17121075, -0.21795374, -0.009583571, -0.13428004, -0.30734754, -0.017038794, 0.113667324, -0.1516075, 0.06525228, -0.13789397, -0.05770066, -0.016166758, -0.29457557), target1);
	target1 = MulAdd(ni2, MF4x4(0.054183286, 0.022085225, 0.086794585, 0.10968018, 0.1276148, 0.05739452, 0.08860957, -0.08131373, -0.081570424, -0.107991874, -0.03724999, 0.000843539, 0.20231429, -0.123543546, -0.19073018, -0.28328305), target1);
	
	MF4 target2 = { -0.002022359, -0.007333954, -0.038140967, -0.03819673 };
	target2 = MulAdd(a1, MF4x4(-0.13948695, 0.016643738, 0.08168136, 0.02315663, 0.017184775, 0.11487715, 0.05770107, 0.010102888, 0.04955321, -0.045132335, -0.05731744, -0.05798246, 0.2245112, 0.17406365, 0.08979801, -0.10607952), target2);
	target2 = MulAdd(b1, MF4x4(0.2812785, 0.022830509, 0.15164222, 0.13460225, 0.22263442, 0.2558749, -0.122489706, 0.10409658, 0.023308244, -0.19583783, -0.007824269, 0.06256542, 0.11161938, 0.14878923, 0.30865005, 0.08962341), target2);
	target2 = MulAdd(c1, MF4x4(-0.20843887, 0.012371968, -0.008279775, -0.042467568, -0.13022369, 0.056743186, -0.018389069, 0.13964763, -0.03361555, -0.053087234, 0.012521351, 0.0209293, 0.015771557, 0.11718523, 0.010176676, 0.021708367), target2);
	target2 = MulAdd(d1, MF4x4(-0.14373007, -0.114338934, -0.09077395, -0.11040866, 0.055298284, 0.022516333, 0.18901019, -0.05640152, -0.1413198, -0.08748339, -0.029985962, 0.00712751, -0.071436934, -0.18909407, 0.173448, 0.053675048), target2);
	target2 = MulAdd(e1, MF4x4(-0.023129769, 0.42883545, -0.18110612, 0.24296297, -0.02441117, 0.18108079, -0.12298153, -0.19192219, -0.14139178, -0.069563635, 0.1524624, -0.17755614, -0.248875, 0.015161957, -0.16541803, -0.17773613), target2);
	target2 = MulAdd(f1, MF4x4(-0.065477535, -0.113195814, -0.08284894, 0.11679537, 0.028445985, -0.026559185, -0.007267581, 0.14052133, 0.14847197, -0.040276285, -0.038166475, -0.030452784, -0.15184602, -0.22223297, 0.113732725, 0.11163395), target2);
	target2 = MulAdd(g1, MF4x4(0.04990171, 0.08493333, 0.08668171, 0.14610586, -0.010766879, -0.05690133, 0.10706113, 0.13667485, 0.044783257, 0.029695645, -0.101674624, -0.02023205, 0.031889528, 0.14293797, 0.08712652, 0.08716896), target2);
	target2 = MulAdd(h1, MF4x4(-0.21387868, -0.21650635, 0.2743992, -0.048781313, -0.027735803, -0.1543507, 0.11343657, -0.18251626, 0.15225998, 0.13158897, -0.41056108, 0.102582805, -0.09181491, -0.0042975787, 0.056065407, -0.16961528), target2);
	target2 = MulAdd(i1, MF4x4(0.08966051, 0.09331515, -0.085415326, -0.022695992, 0.009771476, -0.07143986, 0.0590329, 0.07347928, -0.09033658, -0.06805735, -0.20129825, 0.017873045, 0.16908158, 0.014213783, 0.112663984, 0.10048714), target2);
	target2 = MulAdd(a2, MF4x4(0.115590535, 0.08364541, 0.00864431, -0.094349444, -0.11073411, 0.05337711, 0.055587426, 0.12131219, -0.04710173, -0.046455074, 0.110379905, 0.25445566, 0.15154606, 0.04483541, 0.08708686, 0.113456205), target2);
	target2 = MulAdd(b2, MF4x4(-0.014296297, 0.24858733, 0.05035193, -0.09225393, 0.034625243, 0.06219943, 0.19825043, 0.04673499, -0.4083363, -0.39954248, -0.08299408, 0.048756655, 0.09862206, 0.01588621, 0.0070629907, 0.04173666), target2);
	target2 = MulAdd(c2, MF4x4(0.17356622, 0.1484559, -0.10054033, 0.013332302, 0.15200937, 0.08985606, -0.031668343, -0.026007611, -0.16339104, 0.054744486, 0.07386605, -0.033910174, -0.0018002358, -0.02968911, 0.054931052, 0.09970459), target2);
	target2 = MulAdd(d2, MF4x4(-0.07330346, 0.05938635, 0.01911963, -0.09856661, -0.081916444, -0.046957035, -0.043849826, 0.09572135, -0.13621825, 0.034347896, -0.21189907, 0.10592239, -0.060592845, 0.09957844, 0.050621815, -0.07447668), target2);
	target2 = MulAdd(e2, MF4x4(0.044731334, -0.13406886, -0.04138754, -0.06764551, -0.018899845, 0.35320804, -0.10959127, 0.17435175, -0.17941645, -0.30889434, 0.10573405, 0.0319751, -0.15677677, 0.08164649, 0.16559398, -0.08152387), target2);
	target2 = MulAdd(f2, MF4x4(0.057760764, -0.12145107, 0.06889264, -0.30627275, 0.011501002, -0.080296256, -0.18067095, 0.10592384, 0.12884894, -0.18973115, 0.18740658, 0.28362688, 0.12934786, -0.010292026, 0.0559999, 0.079962276), target2);
	target2 = MulAdd(g2, MF4x4(0.048659086, -0.006250348, -0.041242067, -0.12078197, -0.07152629, 0.05699244, 0.0011704164, -0.023007339, 0.07814492, 0.02546712, -0.08957218, -0.036925297, -0.03383498, 0.12583385, 0.12207602, 0.03910942), target2);
	target2 = MulAdd(h2, MF4x4(0.26151723, 0.23277281, -0.021892069, 0.052827276, 0.18268764, 0.28595275, -0.20529993, 0.19892794, 0.0038986763, 0.114547804, -0.020574905, 0.02405073, 0.11713121, 0.04491106, -0.07557327, 0.014374293), target2);
	target2 = MulAdd(i2, MF4x4(-0.14276731, -0.06600894, -0.029757235, -0.099975966, 0.023050314, -0.07662015, -0.11542214, 0.087981045, 0.070319094, 0.12462511, 0.008152087, 0.12613884, -0.07071591, 0.0063393894, 0.08699723, -0.0242523), target2);
	target2 = MulAdd(na1, MF4x4(0.035586607, -0.26826563, -0.10145326, -0.002177148, 0.022144236, -0.117452875, 0.021346297, 0.051908135, -0.022425706, 0.067299, 0.09406446, 0.078294896, 0.014900606, -0.05468236, 0.07241715, 0.061000507), target2);
	target2 = MulAdd(nb1, MF4x4(-0.184133, 0.06229474, -0.13819578, -0.025011744, -0.01868356, -0.18940887, 0.092631504, -0.092806384, 0.0035951615, 0.11777577, 0.028149817, 0.0049419673, 0.22230826, 0.06337655, -0.20004818, -0.20937593), target2);
	target2 = MulAdd(nc1, MF4x4(0.13852163, -0.094492316, -0.040309057, 0.10771662, 0.18963522, 0.08687606, -0.20030232, -0.082126215, 0.012181411, 0.044306785, -0.036970526, 0.04403363, 0.07911973, 0.0021176056, 0.26944208, -0.06657045), target2);
	target2 = MulAdd(nd1, MF4x4(0.027229607, 0.12410596, 0.04348171, 0.0019921176, 0.088246435, -0.02828269, -0.26499373, -0.12566662, 0.025947344, -0.0078000715, 0.058063716, -0.0032702687, 0.0059978673, -0.04860002, 0.027650384, -0.23394564), target2);
	target2 = MulAdd(ne1, MF4x4(0.07892762, -0.13300626, 0.46678603, -0.033239357, -0.12306804, -0.079602, 0.20534003, 0.23873802, -0.035643574, 0.059950788, -0.26559883, 0.12206408, 0.25408483, 0.029933078, 0.32081822, 0.033947676), target2);
	target2 = MulAdd(nf1, MF4x4(-0.06847802, -0.017930118, -0.12299636, -0.12987946, 0.09267518, -0.0009083275, -0.035390552, -0.15379669, -0.1132433, -0.036670692, -0.08342377, 0.015636675, 0.022590527, 0.10533322, 0.0389949, -0.059033744), target2);
	target2 = MulAdd(ng1, MF4x4(-0.041753534, -0.014428097, 0.06999257, -3.546234e-05, -0.033465035, -0.040709455, 0.13118082, -0.21016484, -0.07846085, -0.030885663, 0.06934681, 0.12725256, -0.023784902, -0.13373604, -0.015261479, 0.05234782), target2);
	target2 = MulAdd(nh1, MF4x4(0.13798563, 0.12757827, -0.26978776, 0.102494285, 0.13285922, 0.35432795, -0.11997128, 0.17108068, -0.12235328, -0.24582328, 0.26962712, -0.086760186, 0.010127441, 0.08048835, 0.047505867, 0.19991067), target2);
	target2 = MulAdd(ni1, MF4x4(0.03584222, -0.13433793, -0.044629525, -0.0010440781, -0.0033084434, -0.026725832, -0.05386642, -0.13612603, 0.10066015, 0.10499841, 0.031767137, -0.04550841, -0.09391546, 0.1454157, -0.26962402, 0.21015608), target2);
	target2 = MulAdd(na2, MF4x4(-0.21956864, -0.13502425, -0.02126954, 0.059263993, -0.13461533, -0.04001395, -0.0924258, -0.069165014, 0.22019973, 0.003270619, 0.022072528, -0.14173602, 0.0028843523, -0.13784003, -0.061057515, -0.0049253837), target2);
	target2 = MulAdd(nb2, MF4x4(-0.0011410525, -0.16098002, -0.12883134, 0.018262507, 0.001481578, 0.19514659, -0.13703239, 0.096059754, 0.34194204, 0.13983466, 0.14021507, 0.011405113, -0.11303146, -0.17050214, -0.06992079, -0.05566986), target2);
	target2 = MulAdd(nc2, MF4x4(-0.12307941, -0.02192472, 0.13193923, -0.061640862, -0.16841564, -0.0822524, 0.10141759, 0.02139286, 0.1599039, -0.050632223, 0.16702358, 0.111514546, 0.02397393, 0.037606515, 0.017971672, -0.048641708), target2);
	target2 = MulAdd(nd2, MF4x4(-0.02697617, -0.08579184, -0.28045088, 0.05262136, -0.059576314, 0.107535526, -0.06188862, 0.0010509328, -0.18178311, -0.17288832, 0.20703638, 0.083048366, 0.03859681, -0.07548898, 0.011605782, -0.021842534), target2);
	target2 = MulAdd(ne2, MF4x4(0.13198483, 0.37200937, -0.0896539, 0.12450637, 0.037202634, 0.035985112, 0.16579124, -0.08967905, -0.24341385, 0.32482424, -0.3037812, -0.007154969, -0.007152382, -0.017435173, 0.12662841, -0.090513505), target2);
	target2 = MulAdd(nf2, MF4x4(-0.014726027, 0.08394915, -0.02100581, 0.24882795, -0.023793869, -0.006450114, 0.17093314, -0.06994153, -0.08689907, 0.113542505, -0.053211495, -0.1780173, 0.030043352, 0.2500714, -0.026940798, -0.0069258413), target2);
	target2 = MulAdd(ng2, MF4x4(0.037078895, -0.03033529, -0.066851325, 0.14718252, 0.066372745, 0.028897487, -0.036055963, 0.035399746, 0.06733992, 0.21021596, -0.18314466, -0.027192699, 0.020213274, -0.17751546, -0.050674338, -0.09382659), target2);
	target2 = MulAdd(nh2, MF4x4(-0.14761917, -0.22166072, 0.033172436, -0.21982265, -0.09172891, -0.20794454, 0.1738752, -0.13685037, 0.10981111, -0.23169234, 0.053787973, 0.12001196, -0.038242023, -0.047124114, 0.22503005, 0.1015142), target2);
	target2 = MulAdd(ni2, MF4x4(0.021231879, -0.015423476, 0.058986407, 0.032002006, -0.029305007, 0.008933183, 0.10777483, -0.112574644, -0.023935415, -0.06604598, 0.053859934, -0.08354717, 0.13703763, -0.078382134, 0.12914242, -0.022056468), target2);

	MF3 target3 = tex5.SampleLevel(sam, pos, 0).rgb;
	target3 = MulAdd(e1, MF4x3(0.19254518, 0.009179287, 0.023821035, 0.020269603, 0.025629226, 0.040180814, -0.025135614, -0.07785793, -0.099851295, -0.122886, 0.03322616, 0.0509256), target3);
	target3 = MulAdd(e2, MF4x3(0.060054794, 0.053996198, 0.047226787, 0.038959846, -0.025839888, -0.030583512, -0.034999896, 0.011966571, -0.011057454, 0.05765179, -0.041760337, -0.0694113), target3);
	target3 = MulAdd(ne1, MF4x3(-0.20393562, -0.0055942894, -0.02089636, 0.14781304, -0.01954523, -0.0746086, 0.071556985, 0.07512172, 0.067927115, 0.084076844, -0.0561336, -0.06856403), target3);
	target3 = MulAdd(ne2, MF4x3(-0.039552618, -0.04448951, -0.04170605, -0.00886809, 0.06708884, 0.07120977, 0.04834384, -0.10599933, -0.11024835, -0.015948117, 0.084044695, 0.10778199), target3);

	tex3[gxy] = target1;
	tex4[gxy] = target2;
	tex6[gxy] = MF4(target3, 1);
}

//!PASS 5
//!DESC Conv-4x3x3x16
//!IN tex3, tex4, tex6
//!OUT tex1, tex2, tex5
//!BLOCK_SIZE 8
//!NUM_THREADS 64

void Pass5(uint2 blockStart, uint3 threadId) {
	uint2 gxy = Rmp8x8(threadId.x) + blockStart;
	uint2 inputSize = GetInputSize();
	if (gxy.x >= inputSize.x || gxy.y >= inputSize.y) {
		return;
	}

	float2 inputPt = GetInputPt();
	float2 pos = (gxy + 0.5f) * inputPt;

	// [ a, d, g ]
	// [ b, e, h ]
	// [ c, f, i ]
	MF4 a1 = tex3.SampleLevel(sam, pos + float2(-inputPt.x, -inputPt.y), 0);
	MF4 b1 = tex3.SampleLevel(sam, pos + float2(-inputPt.x, 0), 0);
	MF4 c1 = tex3.SampleLevel(sam, pos + float2(-inputPt.x, inputPt.y), 0);
	MF4 d1 = tex3.SampleLevel(sam, pos + float2(0, -inputPt.y), 0);
	MF4 e1 = tex3.SampleLevel(sam, pos, 0);
	MF4 f1 = tex3.SampleLevel(sam, pos + float2(0, inputPt.y), 0);
	MF4 g1 = tex3.SampleLevel(sam, pos + float2(inputPt.x, -inputPt.y), 0);
	MF4 h1 = tex3.SampleLevel(sam, pos + float2(inputPt.x, 0), 0);
	MF4 i1 = tex3.SampleLevel(sam, pos + float2(inputPt.x, inputPt.y), 0);

	MF4 na1 = max(-a1, 0);
	MF4 nb1 = max(-b1, 0);
	MF4 nc1 = max(-c1, 0);
	MF4 nd1 = max(-d1, 0);
	MF4 ne1 = max(-e1, 0);
	MF4 nf1 = max(-f1, 0);
	MF4 ng1 = max(-g1, 0);
	MF4 nh1 = max(-h1, 0);
	MF4 ni1 = max(-i1, 0);

	a1 = max(a1, 0);
	b1 = max(b1, 0);
	c1 = max(c1, 0);
	d1 = max(d1, 0);
	e1 = max(e1, 0);
	f1 = max(f1, 0);
	g1 = max(g1, 0);
	h1 = max(h1, 0);
	i1 = max(i1, 0);

	MF4 a2 = tex4.SampleLevel(sam, pos + float2(-inputPt.x, -inputPt.y), 0);
	MF4 b2 = tex4.SampleLevel(sam, pos + float2(-inputPt.x, 0), 0);
	MF4 c2 = tex4.SampleLevel(sam, pos + float2(-inputPt.x, inputPt.y), 0);
	MF4 d2 = tex4.SampleLevel(sam, pos + float2(0, -inputPt.y), 0);
	MF4 e2 = tex4.SampleLevel(sam, pos, 0);
	MF4 f2 = tex4.SampleLevel(sam, pos + float2(0, inputPt.y), 0);
	MF4 g2 = tex4.SampleLevel(sam, pos + float2(inputPt.x, -inputPt.y), 0);
	MF4 h2 = tex4.SampleLevel(sam, pos + float2(inputPt.x, 0), 0);
	MF4 i2 = tex4.SampleLevel(sam, pos + float2(inputPt.x, inputPt.y), 0);

	MF4 na2 = max(-a2, 0);
	MF4 nb2 = max(-b2, 0);
	MF4 nc2 = max(-c2, 0);
	MF4 nd2 = max(-d2, 0);
	MF4 ne2 = max(-e2, 0);
	MF4 nf2 = max(-f2, 0);
	MF4 ng2 = max(-g2, 0);
	MF4 nh2 = max(-h2, 0);
	MF4 ni2 = max(-i2, 0);

	a2 = max(a2, 0);
	b2 = max(b2, 0);
	c2 = max(c2, 0);
	d2 = max(d2, 0);
	e2 = max(e2, 0);
	f2 = max(f2, 0);
	g2 = max(g2, 0);
	h2 = max(h2, 0);
	i2 = max(i2, 0);

	MF4 target1 = { -0.009669773, 0.036289547, -0.050454646, 0.051479716 };
	target1 = MulAdd(a1, MF4x4(0.050738923, 0.15003614, -0.18880141, 0.16791905, 0.16549185, -0.26726744, -0.12813666, -0.021510791, 0.070805945, 0.043350577, 0.0035756908, 0.11776675, -0.01824196, 0.12618026, 0.07424072, 0.032886628), target1);
	target1 = MulAdd(b1, MF4x4(-0.11678059, 0.0565686, 0.04392921, -0.27621672, 0.2116695, 0.038044345, -0.015018062, -0.028636303, 0.049744565, -0.12935996, 0.027176194, -0.13208814, -0.21195693, 0.08980974, 0.013893243, -0.018403184), target1);
	target1 = MulAdd(c1, MF4x4(0.3214697, -0.03143518, 0.19927292, 0.12566878, 0.16190828, 0.11784847, 0.09943727, 0.11755882, 0.017959306, -0.064603634, -0.14054321, -0.11917774, 0.0056958874, 0.06461699, 0.104604125, 0.021947173), target1);
	target1 = MulAdd(d1, MF4x4(-0.24738057, -0.034892898, -0.03364674, 0.017340986, 0.02933764, -0.08090866, -0.034651175, -0.17391174, 0.08536477, -0.17446008, 0.22706915, -0.10555482, 0.0877744, 0.0681237, -0.035909466, -0.10355238), target1);
	target1 = MulAdd(e1, MF4x4(-0.090646185, -0.12971672, -0.14531808, -0.060838025, 0.24902023, 0.1310588, 0.18602785, 0.21283495, -0.32160765, -0.070119165, -0.10350057, 0.19260244, -0.2610542, -0.3030521, 0.08432348, -0.22286619), target1);
	target1 = MulAdd(f1, MF4x4(0.28333843, -0.053968847, 0.08344997, 0.19987041, 0.22163449, 0.22161576, 0.0030572868, 0.10848695, -0.20529847, 0.08406883, -0.07130339, 0.09987656, 0.29774663, -0.08768785, 0.15567012, -0.010313759), target1);
	target1 = MulAdd(g1, MF4x4(-0.1260916, -0.071901485, -0.30566844, 0.19393384, -0.05133266, 0.07868844, -0.24817581, 0.055521224, 0.23277187, 0.16324161, 0.07110341, -0.042626668, 0.052509766, -0.014292625, -0.019677468, 0.041733738), target1);
	target1 = MulAdd(h1, MF4x4(-0.04264262, -0.06528029, 0.0013520801, -0.02140956, 0.27304867, -0.029477939, -0.1859993, 0.01418354, 0.07256604, 0.14302284, 0.03309569, -0.15932149, 0.01500576, -0.053860538, 0.1131707, -0.06272606), target1);
	target1 = MulAdd(i1, MF4x4(-0.0400483, -0.030188695, -0.108427785, 0.057873204, 0.42774406, -0.11353873, 0.110134825, 0.052191462, 0.00087113964, 0.040683694, 0.100507155, -0.16746339, -0.26971558, 0.06506685, -0.20950548, 0.040783025), target1);
	target1 = MulAdd(a2, MF4x4(0.11394146, -0.10693933, 0.2377026, -0.03783948, -0.16496852, 0.046675198, -0.23396324, 0.05696911, -0.02770668, 0.12922443, -0.093586415, 0.102305606, 0.0040032533, -0.038440734, -0.0035825048, -0.22108772), target1);
	target1 = MulAdd(b2, MF4x4(0.17577791, -0.024538597, -0.19877498, -0.14544973, 0.16614193, -0.3279891, 0.14678721, -0.16355143, -0.012954231, 0.20982395, 0.044255227, 0.087878115, 0.11289659, -0.26981032, -0.10789584, 0.24094439), target1);
	target1 = MulAdd(c2, MF4x4(0.0041394173, -0.0937936, 0.15251775, 0.1026978, -0.01999847, -0.02865502, 0.16765144, -0.17490439, -0.016996933, 0.03891808, -0.01858217, -0.106255606, 0.027496144, -0.14120618, 0.023483312, -0.08291959), target1);
	target1 = MulAdd(d2, MF4x4(0.060642462, -0.2957824, 0.33968493, -0.04501478, -0.14999421, -0.0067213452, -0.018236576, 0.01627547, -0.07771579, 0.0124932695, -0.11797959, -0.090979554, 0.0096479915, 0.021336472, -0.07794724, 0.030138575), target1);
	target1 = MulAdd(e2, MF4x4(-0.091704845, -0.20800348, -0.22158638, 0.048748583, 0.15139692, -0.2832814, 0.09610812, 0.41077513, 0.0007106381, -0.14465855, 0.0056652213, 0.031696238, -0.03384328, 0.1940933, 0.19262145, 0.014331562), target1);
	target1 = MulAdd(f2, MF4x4(-0.16637586, -0.22008398, 0.102937706, 0.15260033, 0.039856806, -0.21082906, -0.19694057, 0.0712475, 0.015049883, 0.17320138, 0.06505415, -0.020279367, -0.018576574, 0.201407, -0.08108244, 0.04151909), target1);
	target1 = MulAdd(g2, MF4x4(-0.12496581, 0.107817784, 0.10645319, 0.035113968, 0.0166165, 0.1316661, -0.045253787, -0.03863719, 0.09126881, 0.07553792, -0.029150097, -0.07629157, -0.17978054, -0.27080613, -0.028408276, -0.15366451), target1);
	target1 = MulAdd(h2, MF4x4(0.081859134, -0.11599677, 0.027383117, 0.092724435, 0.059302155, 0.10008954, -0.12217131, 0.07471211, -0.20396213, -0.040741358, 0.118772194, -0.21725504, 0.099645875, 0.09691941, -0.07696025, -0.016445495), target1);
	target1 = MulAdd(i2, MF4x4(-0.18712623, -0.14458412, 0.03693652, 0.014525352, -0.09607279, -0.19400409, 0.032149505, 0.07106094, 0.051436905, -0.07765334, 0.017043818, 0.17777587, 0.05274306, 0.0062209824, -0.080005355, 0.026041988), target1);
	target1 = MulAdd(na1, MF4x4(-0.090594456, -0.041637532, 0.10346829, -0.09393943, 0.027663473, 0.20729685, -0.011156861, 0.021863503, 0.04781304, -0.039483577, -0.092933334, -0.25187445, 0.033062164, 0.010756357, -0.13035728, -0.008321023), target1);
	target1 = MulAdd(nb1, MF4x4(0.07772912, 0.010776647, -0.018709056, 0.25634038, 0.00906326, 0.21411708, 0.122652486, 0.07725616, 0.15266491, 0.1274286, 0.10400329, 0.20354506, 0.013765407, -0.039089683, 0.25870228, -0.08919069), target1);
	target1 = MulAdd(nc1, MF4x4(-0.14971368, 0.06935879, -0.089983195, 0.01406992, 0.16989979, -0.037809014, 0.07157283, -0.050660506, -0.032826405, 0.033794664, -0.0051332368, 0.089349195, 0.06263488, -0.07048108, 0.07263597, -0.11618368), target1);
	target1 = MulAdd(nd1, MF4x4(0.013391823, -0.07888697, -0.13984044, -0.01241464, -0.06475807, 0.06978077, -0.20329754, 0.16602662, 0.013664227, 0.12317301, -0.10240692, -0.0657491, -0.31402445, -0.14472555, 0.1739024, 0.0005437834), target1);
	target1 = MulAdd(ne1, MF4x4(0.16330495, 0.02644609, 0.23837087, -0.07734767, 0.12377497, -0.18478604, 0.35040903, -0.05262452, 0.049074646, -0.0077528385, 0.15370984, -0.22888668, 0.3603141, 0.29372314, -0.4432887, 0.20702155), target1);
	target1 = MulAdd(nf1, MF4x4(-0.18785694, 0.21085343, -0.111042105, 0.0478716, -0.08214944, -0.0922987, 0.29570273, 0.025100114, 0.25403878, 0.01271447, 0.21851794, -0.1434596, -0.21153769, 0.023305666, -0.10386609, 0.043919638), target1);
	target1 = MulAdd(ng1, MF4x4(-0.117247805, 0.013329102, 0.0313911, -0.08055777, -0.0053445757, -0.2886372, 0.07938673, -0.06659165, 0.20798062, 0.030106818, -0.04811631, 0.036332276, -0.057687126, 0.03813657, 0.035860628, -0.11273985), target1);
	target1 = MulAdd(nh1, MF4x4(-0.0031557097, 0.027456097, -0.14444692, 0.08411739, 0.13466308, -0.13212901, -0.0034804344, 0.1464661, -0.21033211, 0.05913627, 0.10233881, 0.009844489, -0.15369488, -0.018978333, -0.07518442, -0.010549853), target1);
	target1 = MulAdd(ni1, MF4x4(0.112989105, -0.011166866, -0.08277204, 0.046827227, -0.08067428, 0.13465053, -0.1656419, 0.07280515, 0.037523627, -0.050147127, -0.17731906, 0.1067486, 0.119732924, -0.102017604, 0.31421226, -0.14060387), target1);
	target1 = MulAdd(na2, MF4x4(-0.1106223, 0.09229271, -0.09355422, -0.02413533, -0.096457504, -0.13282233, 0.022983741, -0.13534859, -0.0056585902, -0.07214356, 0.14617127, -0.13723095, 0.058078192, -0.1038417, -0.10452195, -0.18855028), target1);
	target1 = MulAdd(nb2, MF4x4(0.16357008, 0.080841675, 0.1663936, 0.20815827, 0.03813903, 0.34158087, -0.012987109, 0.39152008, -0.027927356, -0.14332302, -0.012866622, -0.016149148, -0.08733816, 0.1960951, 0.19572765, -0.2710826), target1);
	target1 = MulAdd(nc2, MF4x4(0.024827998, 0.24175219, 0.030659903, -0.22227505, 0.026898654, 0.009930298, 0.088392995, 0.32644793, -0.10351868, -0.08717382, 0.22931585, 0.05197704, 0.06534648, 0.13636068, 0.062107667, 0.024806283), target1);
	target1 = MulAdd(nd2, MF4x4(-0.18550465, 0.062058095, -0.08620093, 0.20158216, -0.1460996, 0.14275469, -0.28057688, -0.11685651, -0.09627509, 0.09029933, 0.03669734, 0.1257313, -0.07974307, 0.020742215, -0.0039170664, 0.11340528), target1);
	target1 = MulAdd(ne2, MF4x4(0.15225565, 0.171972, 0.13573253, 0.0056740018, -0.1667786, 0.06028638, -0.1255049, -0.23327217, -0.139949, 0.029957669, -0.16713464, 0.046236664, -0.05070503, 0.18714412, -0.20076098, 0.1672637), target1);
	target1 = MulAdd(nf2, MF4x4(0.18468563, 0.07733334, 0.14463845, -0.10712052, 0.36213547, 0.29404843, 0.2110929, 0.14646721, -0.059985258, -0.2709805, 0.073061034, -0.039072156, 0.015898943, -0.17166951, 0.20194982, -0.04723745), target1);
	target1 = MulAdd(ng2, MF4x4(-0.26353067, 0.050225407, -0.42643914, 0.06601958, -0.10513071, -0.1654714, 0.0593609, 0.027410276, -0.19465327, -0.13865606, 0.05579213, 0.07982532, -0.20893136, -0.008150932, 0.053529713, -0.0317475), target1);
	target1 = MulAdd(nh2, MF4x4(-0.012075693, -0.27574313, 0.22184552, -0.117393926, -0.49310133, -0.13997443, -0.079180904, -0.053438634, -0.07552426, -0.045796394, -0.037434675, 0.24076645, -0.04395852, 0.10325762, -0.19867313, -0.070216134), target1);
	target1 = MulAdd(ni2, MF4x4(-0.026107877, -0.030023552, -0.047810435, 0.20572239, 0.061861858, 0.1776161, -0.306099, 0.16332485, -0.1843373, 0.06758581, -0.23902373, -0.10575018, 0.03990962, -0.046113137, 0.14876197, -0.21280771), target1);
	
	MF4 target2 = { 0.07789114, 0.0024746545, 0.1891165, -0.0023716448 };
	target2 = MulAdd(a1, MF4x4(-0.14542116, -0.15827142, -0.20811677, -0.103433, 0.19787271, 0.33990738, 0.17085013, -0.059132278, 0.013047369, -0.1687924, 0.06732661, -0.050968684, 0.09197164, -0.041265316, -0.108277336, -0.014430892), target2);
	target2 = MulAdd(b1, MF4x4(-0.022837132, 0.20440012, -0.14266612, 0.019944299, 0.069084294, 0.3171199, -0.1521742, -0.35806596, 0.13581008, -0.13811131, 0.12219503, 0.17329764, -0.15100783, 0.0862648, 0.118227705, 0.18736814), target2);
	target2 = MulAdd(c1, MF4x4(0.013604392, 0.11496102, -0.18734755, -0.047555517, 0.05297245, 0.006461213, 0.06247472, -0.0202791, 0.02329791, 0.11530998, -0.148774, 0.0965498, 0.1487269, 0.061629567, -0.22488646, -0.005393787), target2);
	target2 = MulAdd(d1, MF4x4(-0.29286116, 0.11958281, -0.11193505, -0.17139061, -0.035151243, -0.2635945, 0.0002499315, -0.16346519, 0.23779829, 0.04454211, 0.21293561, 0.25617847, 0.12194803, -0.0017443774, -0.009216221, -0.034387548), target2);
	target2 = MulAdd(e1, MF4x4(0.28791443, -0.25421545, -0.058626153, -0.1520494, -0.16808414, -0.39723453, -0.13199537, 0.056999452, -0.048155293, 0.38699663, -0.114719056, 0.001293743, -0.0959443, -0.08189709, 0.26921842, 0.061219636), target2);
	target2 = MulAdd(f1, MF4x4(0.00781977, -0.07103863, -0.21942843, 0.2419546, 0.20016691, -0.28697264, -0.034715973, -0.03381459, -0.028126812, 0.046806023, -0.14423183, -0.13472253, 0.009225362, -0.086190686, 0.0041205613, 0.08953202), target2);
	target2 = MulAdd(g1, MF4x4(-0.04926224, -0.099740155, -0.088695474, 0.09950333, -0.06495916, 0.20126842, -0.0062843356, -0.034764495, -0.10808971, -0.19946553, 0.075991094, 0.14746219, 0.08247818, 0.07382381, -0.056908615, -0.026823666), target2);
	target2 = MulAdd(h1, MF4x4(-0.04837408, 0.12605472, -0.23957102, -0.14252385, -0.046534102, -0.07511751, -0.21040416, 0.2064639, -0.006026243, -0.25005546, -0.063780144, 0.076840036, -0.07484346, 0.017368162, 0.04657373, -0.022188455), target2);
	target2 = MulAdd(i1, MF4x4(0.04545079, -0.002226373, -0.11695467, 0.12954631, 0.054903183, 0.15162702, -0.19222596, 0.05351421, -0.079599276, -0.036238387, 0.1362261, 0.037431743, -0.0015106505, 0.18739921, 0.122365154, -0.05871144), target2);
	target2 = MulAdd(a2, MF4x4(-0.005558987, -0.13553315, -0.006372213, 0.06633917, -0.22141413, -0.15780807, 0.057122614, -0.057320844, -0.06306763, 0.19112623, -0.041758966, 0.03555483, -0.005718873, 0.009167371, 0.050909385, -0.14599234), target2);
	target2 = MulAdd(b2, MF4x4(0.18175003, 0.10442485, 0.052994236, -0.4001252, -0.08328538, 0.06380226, -0.055015627, 0.010929493, -0.22888647, -0.033181675, -0.07570874, 0.07933599, -0.07894686, 0.12202901, 0.13679314, -0.054344065), target2);
	target2 = MulAdd(c2, MF4x4(0.030145945, -0.06121175, -0.08550973, 0.10082535, 0.07198805, 0.21414264, -0.25636044, 0.028803539, 0.043738026, -0.0367658, 0.27998537, -0.06274612, -0.22862338, 0.002624325, 0.28519824, 0.18540645), target2);
	target2 = MulAdd(d2, MF4x4(-0.012136538, -0.07059324, 0.018098673, 0.12078888, -0.087637, 0.041642863, 0.034997553, -0.16741107, 0.04701011, -0.004160269, 0.122639626, 0.0043271836, 0.011551197, -0.16421974, -0.102481335, 0.014233497), target2);
	target2 = MulAdd(e2, MF4x4(-0.37945676, 0.25232047, -0.03707734, -0.1985225, -0.11536396, 0.22039749, -0.21809638, -0.10596801, -0.17211124, -0.2035486, 0.011822896, 0.27510995, -0.105182275, 0.022503568, -0.0063389307, -0.071560584), target2);
	target2 = MulAdd(f2, MF4x4(-0.16101715, -0.034247126, 0.16626042, 0.031131435, 0.03048031, -0.105447404, -0.05728527, -0.14518815, -0.019103229, -0.15152888, -0.119154684, 0.028724093, 0.05836196, -0.35943082, -0.016481897, -0.0437348), target2);
	target2 = MulAdd(g2, MF4x4(-0.07719413, -0.33214888, -0.0541927, 0.16506542, -0.032792456, 0.016834807, 0.1724155, 0.073768586, 0.002303886, -0.001382793, -0.0562648, -0.10167158, -0.19101655, 0.052783452, -0.1422853, 0.09653729), target2);
	target2 = MulAdd(h2, MF4x4(-0.30030164, 0.11637444, -0.23238538, -0.27238008, -0.077208534, -0.027645003, 0.10369907, 0.20162316, -0.14428844, 0.1766293, 0.024419712, 0.11301171, 0.07772854, 0.18613201, 0.20721672, -0.1751799), target2);
	target2 = MulAdd(i2, MF4x4(-0.1026615, -0.12484944, 0.15386428, 0.038676128, -0.119472496, -0.032417197, -0.14208497, -0.05254358, -0.0035079278, -0.011276316, 0.043117497, -0.010022288, 0.031624593, 0.014969992, -0.031410277, 0.15284787), target2);
	target2 = MulAdd(na1, MF4x4(0.018149922, -0.05906194, 0.054767277, 0.008161979, -0.076949194, 0.040888708, -0.006419542, -0.12897012, -0.0028229658, 0.20937827, 0.02741711, -0.04013348, -0.12731804, 0.008064522, 0.002870103, 0.027690327), target2);
	target2 = MulAdd(nb1, MF4x4(0.023197446, -0.08888926, 0.15531142, 0.13745947, 0.054352283, -0.121785395, 0.16237587, 0.023567237, -0.36160588, 0.30499592, -0.033180915, -0.1515843, 0.04251452, -0.17903805, 0.03235283, -0.08062386), target2);
	target2 = MulAdd(nc1, MF4x4(-0.0072868476, -0.2010616, 0.13061914, 0.12846659, 0.11725315, 0.14589547, -0.05373261, -0.081606135, -0.07010131, -0.025378224, 0.10265872, 0.18658938, -0.12165338, 0.036297683, 0.03925332, 0.16576236), target2);
	target2 = MulAdd(nd1, MF4x4(0.10300252, -0.11548347, -0.08691649, -0.014866044, -0.3213804, 0.47206497, -0.16032113, 0.026284516, 0.046302956, -0.052474245, -0.025335522, -0.10957576, -0.16872157, 0.19049212, -0.023881195, 0.061396897), target2);
	target2 = MulAdd(ne1, MF4x4(-0.16202278, 0.52128345, -0.2601511, 0.06116799, -0.21123995, 0.39389637, -0.350544, -0.16157438, -0.02823116, -0.39056876, -0.14267299, 0.03262984, 0.342303, -0.20556125, -0.0019219286, -0.1824844), target2);
	target2 = MulAdd(nf1, MF4x4(0.23399737, -0.0912646, 0.11152403, -0.20945886, -0.053451832, -0.09786892, -0.059099484, 0.18103573, -0.117154315, -0.18342866, 0.12650815, 0.0067340015, -0.037984423, 0.17667364, 0.071636364, -0.011689163), target2);
	target2 = MulAdd(ng1, MF4x4(-0.099510275, -0.0925438, -0.009136904, -0.03774997, -0.13348748, 0.3605135, -0.078298144, -0.14712195, 0.22566219, 0.18659295, 0.05614545, 0.10792911, -0.12477693, -0.03587624, 0.08050775, -0.054740936), target2);
	target2 = MulAdd(nh1, MF4x4(0.10312337, -0.063681684, 0.16496794, 0.09038492, -0.08903926, 0.41163155, -0.013669214, -0.21472235, -0.054991595, 0.0033639956, 0.18160143, 0.17240305, -0.039428882, 0.17087695, -0.1729076, 0.09871825), target2);
	target2 = MulAdd(ni1, MF4x4(-0.13123736, 0.0802573, 0.077981554, -0.101768315, 0.089998, -0.13781744, 0.122858986, 0.054121554, -0.02640825, 0.13577555, -0.037485655, -0.04179625, 0.000106130996, -0.100183845, 0.00046665114, 0.21791616), target2);
	target2 = MulAdd(na2, MF4x4(0.011894387, -0.030088445, 0.025817253, 0.08193235, 0.109322436, 0.10855583, -0.19661167, -0.09405307, 0.2073779, -0.33972177, 0.048635002, -0.14883177, 0.056954246, 0.3953476, 0.18765114, -0.014010224), target2);
	target2 = MulAdd(nb2, MF4x4(-0.22594279, -0.014942035, -0.1519647, 0.25367293, 0.16330296, 0.03317176, -0.32148597, -0.46503916, 0.19944623, -0.26229686, 0.019909514, -0.059794176, 0.12912126, 0.044948537, -0.08649492, 0.08024645), target2);
	target2 = MulAdd(nc2, MF4x4(-0.022943841, -0.068013534, 0.11032515, 0.011685601, 0.020096298, -0.3285243, 0.08196111, -0.089537136, -0.03976742, -0.1315977, -0.36306036, 0.24678081, 0.22115967, -0.017472323, -0.19451386, -0.035218123), target2);
	target2 = MulAdd(nd2, MF4x4(-0.020891193, -0.12721714, -0.15030408, 0.026523203, -0.12413139, -0.11235275, -0.21476477, -0.11326953, 0.028815055, -0.18552732, -0.0076828003, -0.14679903, 0.020509586, -0.18695217, 0.06696879, 0.103938386), target2);
	target2 = MulAdd(ne2, MF4x4(0.057521313, 0.28509304, -0.2525733, 0.16745082, -0.26614547, 0.18545172, -0.27140215, 0.018639714, 0.19730581, 0.1659491, -0.058363054, -0.4048628, 0.024913948, -0.44124457, 0.13872208, -0.0371103), target2);
	target2 = MulAdd(nf2, MF4x4(0.100904405, 0.06700356, -0.035322092, 0.21781014, 0.018047005, -0.21737386, -0.3734802, 0.13506944, 0.012760691, 0.06620756, -0.0253398, 0.0030280363, -0.044015452, -0.055860534, -0.3547194, -0.04230283), target2);
	target2 = MulAdd(ng2, MF4x4(-0.19012743, -0.34408915, 0.18940191, 0.13152952, 0.107553795, -0.00694412, -0.07930157, -0.30964044, 0.034710668, -0.031806916, 0.019838978, 0.017044948, 0.110688254, -0.0029772928, 0.09414367, -0.10760175), target2);
	target2 = MulAdd(nh2, MF4x4(-0.05745392, 0.29022983, 0.014998233, 0.27365527, 0.08169933, 0.0734232, -0.09404464, -0.26870936, 0.21171738, -0.19529793, -0.064401075, -0.18972695, -0.08024953, -0.027122354, -0.11661348, 0.010131282), target2);
	target2 = MulAdd(ni2, MF4x4(0.07599435, -0.06851123, 0.06258365, 0.10296892, 0.15556085, -0.041609086, -0.11303363, 0.07082365, 0.013949174, -0.087201476, -0.0855705, -0.12979257, 0.04048528, 0.4211556, 0.04118289, -0.22093314), target2);

	MF3 target3 = tex6.SampleLevel(sam, pos, 0).rgb;
	target3 = MulAdd(e1, MF4x3(0.050153337, 0.012563414, 0.014994658, 0.10498867, 0.07151875, 0.06761489, 0.061650798, -0.035183728, -0.050987806, 0.0017240314, 0.041055307, 0.020366805), target3);
	target3 = MulAdd(e2, MF4x3(0.110105395, -0.044468552, -0.072567016, -0.049364448, -0.015713394, -0.021540897, -0.01636263, -0.084110685, -0.08281401, -0.08940374, 0.047863875, 0.051104594), target3);
	target3 = MulAdd(ne1, MF4x3(-0.081597924, 0.002422661, 0.01143175, -0.07504751, -0.09938017, -0.1063178, -0.10390281, 0.0262197, 0.060155805, -0.24289346, -0.0054961476, 0.045964316), target3);
	target3 = MulAdd(ne2, MF4x3(-0.1829316, 0.047622137, 0.07963877, 0.048703995, -0.0026299425, -0.003712008, 0.029338706, 0.096882835, 0.102083966, 0.078538164, -0.07247937, -0.06820231), target3);

	tex1[gxy] = target1;
	tex2[gxy] = target2;
	tex5[gxy] = MF4(target3, 1);
}

//!PASS 6
//!DESC Conv-4x3x3x16
//!IN tex1, tex2, tex5
//!OUT tex3, tex4, tex6
//!BLOCK_SIZE 8
//!NUM_THREADS 64

void Pass6(uint2 blockStart, uint3 threadId) {
	uint2 gxy = Rmp8x8(threadId.x) + blockStart;
	uint2 inputSize = GetInputSize();
	if (gxy.x >= inputSize.x || gxy.y >= inputSize.y) {
		return;
	}

	float2 inputPt = GetInputPt();
	float2 pos = (gxy + 0.5f) * inputPt;

	// [ a, d, g ]
	// [ b, e, h ]
	// [ c, f, i ]
	MF4 a1 = tex1.SampleLevel(sam, pos + float2(-inputPt.x, -inputPt.y), 0);
	MF4 b1 = tex1.SampleLevel(sam, pos + float2(-inputPt.x, 0), 0);
	MF4 c1 = tex1.SampleLevel(sam, pos + float2(-inputPt.x, inputPt.y), 0);
	MF4 d1 = tex1.SampleLevel(sam, pos + float2(0, -inputPt.y), 0);
	MF4 e1 = tex1.SampleLevel(sam, pos, 0);
	MF4 f1 = tex1.SampleLevel(sam, pos + float2(0, inputPt.y), 0);
	MF4 g1 = tex1.SampleLevel(sam, pos + float2(inputPt.x, -inputPt.y), 0);
	MF4 h1 = tex1.SampleLevel(sam, pos + float2(inputPt.x, 0), 0);
	MF4 i1 = tex1.SampleLevel(sam, pos + float2(inputPt.x, inputPt.y), 0);

	MF4 na1 = max(-a1, 0);
	MF4 nb1 = max(-b1, 0);
	MF4 nc1 = max(-c1, 0);
	MF4 nd1 = max(-d1, 0);
	MF4 ne1 = max(-e1, 0);
	MF4 nf1 = max(-f1, 0);
	MF4 ng1 = max(-g1, 0);
	MF4 nh1 = max(-h1, 0);
	MF4 ni1 = max(-i1, 0);

	a1 = max(a1, 0);
	b1 = max(b1, 0);
	c1 = max(c1, 0);
	d1 = max(d1, 0);
	e1 = max(e1, 0);
	f1 = max(f1, 0);
	g1 = max(g1, 0);
	h1 = max(h1, 0);
	i1 = max(i1, 0);

	MF4 a2 = tex2.SampleLevel(sam, pos + float2(-inputPt.x, -inputPt.y), 0);
	MF4 b2 = tex2.SampleLevel(sam, pos + float2(-inputPt.x, 0), 0);
	MF4 c2 = tex2.SampleLevel(sam, pos + float2(-inputPt.x, inputPt.y), 0);
	MF4 d2 = tex2.SampleLevel(sam, pos + float2(0, -inputPt.y), 0);
	MF4 e2 = tex2.SampleLevel(sam, pos, 0);
	MF4 f2 = tex2.SampleLevel(sam, pos + float2(0, inputPt.y), 0);
	MF4 g2 = tex2.SampleLevel(sam, pos + float2(inputPt.x, -inputPt.y), 0);
	MF4 h2 = tex2.SampleLevel(sam, pos + float2(inputPt.x, 0), 0);
	MF4 i2 = tex2.SampleLevel(sam, pos + float2(inputPt.x, inputPt.y), 0);

	MF4 na2 = max(-a2, 0);
	MF4 nb2 = max(-b2, 0);
	MF4 nc2 = max(-c2, 0);
	MF4 nd2 = max(-d2, 0);
	MF4 ne2 = max(-e2, 0);
	MF4 nf2 = max(-f2, 0);
	MF4 ng2 = max(-g2, 0);
	MF4 nh2 = max(-h2, 0);
	MF4 ni2 = max(-i2, 0);

	a2 = max(a2, 0);
	b2 = max(b2, 0);
	c2 = max(c2, 0);
	d2 = max(d2, 0);
	e2 = max(e2, 0);
	f2 = max(f2, 0);
	g2 = max(g2, 0);
	h2 = max(h2, 0);
	i2 = max(i2, 0);

	MF4 target1 = { -0.05988374, -0.23198523, -0.058251306, -0.038808554 };
	target1 = MulAdd(a1, MF4x4(0.10883355, -0.14958352, 0.026701333, 0.090302855, 0.033934478, 0.120340124, 0.027125617, -0.16792692, -0.075757094, 0.28692973, 0.013230067, -0.040618937, 0.087148145, -0.05985753, -0.06352023, -0.05775848), target1);
	target1 = MulAdd(b1, MF4x4(-0.18206549, -0.10363482, 0.097648725, -0.08801144, 0.31633568, 0.058347676, -0.009121898, 0.02594872, 0.14757825, 0.4730546, -0.008518203, -0.3090668, -0.004052835, -0.14166127, -0.010156037, 0.21191326), target1);
	target1 = MulAdd(c1, MF4x4(0.05735183, 0.039180398, -0.12357178, 0.04830351, 0.120369986, -0.052775342, 0.005902798, 0.07695394, 0.00602021, 0.16758691, 0.10287989, -0.1718468, -0.1319741, 0.16932078, -0.2055026, -0.31820264), target1);
	target1 = MulAdd(d1, MF4x4(0.05427556, -0.28392607, 0.08579091, -0.0015861926, 0.062348455, -0.27778792, -0.07450379, 0.01616914, -0.012357131, -0.056992117, -0.1896176, 0.018156245, 0.06499259, -0.076558664, 0.10341699, -0.08993959), target1);
	target1 = MulAdd(e1, MF4x4(-0.05741742, -0.05414434, 0.18006511, 0.09840777, -0.11849741, 0.40419933, 0.21349974, 0.40268886, 0.23218039, -0.0680356, -0.3130592, -0.21271054, 0.13776754, 0.19114101, 0.17373541, 0.43457666), target1);
	target1 = MulAdd(f1, MF4x4(-0.060757063, 0.11339545, -0.042958036, -0.06483378, -0.06681766, -0.056395415, 0.037868995, 0.033861663, -0.1041215, 0.0046828864, 0.14360638, 0.087886184, -0.26808187, 0.19876598, -0.05276215, -0.07073776), target1);
	target1 = MulAdd(g1, MF4x4(-0.24029991, -0.14217372, -0.011767948, 0.011623913, 0.33820602, -0.24501325, -0.11444902, 0.14536968, 0.16780593, 0.0065867775, -0.074971735, 0.021472024, -0.10853042, 0.09527126, 0.009436061, -0.09688826), target1);
	target1 = MulAdd(h1, MF4x4(-0.31893802, -0.0016892607, -0.105592966, -0.116694786, -0.007851739, 0.1429722, 0.0741952, 0.050125953, 0.07185179, 0.1900389, 0.030889044, 0.15422693, 0.12550323, 0.3556344, 0.108276874, -0.099125646), target1);
	target1 = MulAdd(i1, MF4x4(-0.33620578, -0.11113713, -0.15881014, 0.028243937, -0.12028756, -0.028566968, -0.002682634, -0.15635195, -0.06869284, -0.03309234, 0.03086361, 0.050773233, -0.08939835, 0.15237434, -0.024076303, -0.13092752), target1);
	target1 = MulAdd(a2, MF4x4(-0.31200737, 0.32207087, -0.068700634, -0.39202076, 0.0676771, 0.083766654, -0.05696634, 0.03088338, 0.046761762, 0.09732023, 0.030844063, -0.03369749, -0.12664944, -0.029924957, 0.10551989, 0.086157694), target1);
	target1 = MulAdd(b2, MF4x4(-0.1919761, 0.17179352, -0.025805056, -0.05570367, -0.16736336, 0.07430868, -0.13228212, 0.10702857, -0.09723214, 0.1884809, 0.09422538, -0.16902041, -0.1964137, 0.17877853, 0.17453954, -0.11339361), target1);
	target1 = MulAdd(c2, MF4x4(0.11865004, 0.013131073, 0.17317963, -0.2077911, -0.1116894, 0.09672745, -0.023348883, -0.1176519, 0.15893579, 0.22941695, 0.18798698, 0.059098385, 0.09498779, 0.10118143, 0.08737761, -0.016268898), target1);
	target1 = MulAdd(d2, MF4x4(-0.025380889, 0.17163627, -0.014800655, 0.12669696, 0.050048903, -0.06513837, 0.020915661, 0.2144372, -0.17799327, 0.0068409992, 0.06751171, -0.16618991, 0.14637277, 0.010591964, -0.15909241, 0.02660789), target1);
	target1 = MulAdd(e2, MF4x4(0.3178319, 0.15036377, -0.03386948, 0.13883169, -0.33842105, 0.061425313, -0.04195804, 0.22558802, 0.2250625, 0.060225345, -0.08467863, 0.0014776831, 0.080328, 0.03221249, 0.20838667, 0.11489719), target1);
	target1 = MulAdd(f2, MF4x4(-0.0013924981, 0.28233197, -0.17997956, -0.10959627, -0.16253087, 0.016549526, -0.1571556, 0.017017027, -0.14697123, 0.0869202, 0.2104898, -0.15658243, 0.13424201, -0.022636503, -0.09512045, 0.0927298), target1);
	target1 = MulAdd(g2, MF4x4(-0.038486905, -0.19215351, -0.2446516, -0.02958912, 0.06899297, 0.028667469, -0.05537665, 0.066711955, -0.0017354499, -0.07466053, 0.028587297, -0.042017035, 0.023596823, 0.0067433366, -0.14685915, 0.13400853), target1);
	target1 = MulAdd(h2, MF4x4(0.0573442, 0.1424536, 0.19606829, 0.07141616, -0.032276712, 0.20030099, 0.16644277, 0.10393295, 0.27240822, 0.0071844175, -0.023368603, -0.14067268, -0.20310283, 0.039528254, 0.103837095, 0.08236034), target1);
	target1 = MulAdd(i2, MF4x4(0.15616669, 0.3495403, -0.05678421, -0.069600284, -0.07361787, 0.079501756, 0.009530261, -0.032385882, 0.029831208, -0.095407076, 0.010261287, 0.15250465, -0.04868275, 0.058579214, 0.03779718, -0.10810775), target1);
	target1 = MulAdd(na1, MF4x4(0.06492073, 0.018667994, -0.004712761, -0.032692235, 0.04027288, -0.114499666, -0.04327484, 0.13778907, -0.09373396, -0.08822919, 0.04796151, -0.057756703, -0.26161298, 0.07182931, 0.12998815, -0.14389744), target1);
	target1 = MulAdd(nb1, MF4x4(0.19001032, 0.13091461, -0.2551175, 0.013365716, -0.031779066, 0.002531366, -0.13807543, -0.14165778, -0.2701911, -0.0890182, 0.34704998, -0.008494185, 0.16179956, -0.060182545, 0.060827415, -0.17249492), target1);
	target1 = MulAdd(nc1, MF4x4(0.10665868, 0.15999752, -0.042796712, -0.14010513, -0.014244899, 0.017433831, 0.053657144, -0.0965679, 0.23623326, 0.0690172, 0.1290121, -0.025523739, 0.122357905, -0.18172716, 0.02829383, 0.10042929), target1);
	target1 = MulAdd(nd1, MF4x4(-0.09273112, 0.09466892, -0.009225705, 0.16772579, 0.0813042, -0.16461512, 0.038097944, 0.19834967, -0.033650465, -0.12888893, 0.1414859, -0.021587005, -0.0047441716, 0.08880282, 0.020621201, 0.065779164), target1);
	target1 = MulAdd(ne1, MF4x4(0.0051817205, 0.20322648, -0.077459775, 0.07461627, 0.1817634, -0.5371515, -0.29336745, -0.57652086, 0.035826538, 0.41058993, 0.21512514, -0.041881148, -0.2490056, -0.07172767, 0.20821427, -0.69866294), target1);
	target1 = MulAdd(nf1, MF4x4(0.18961228, 0.027452804, -0.0075194626, -0.029665018, 0.28770384, -0.099777386, -0.12160496, 0.07690297, 0.30273837, 0.026466522, 0.18100439, -0.09078488, 0.2035407, -0.062081084, 0.06744994, -0.07512911), target1);
	target1 = MulAdd(ng1, MF4x4(0.008473044, 0.07501521, -0.11242355, -0.039451122, -0.21818535, -0.07779562, 0.13194147, 0.084983595, 0.0770609, -0.034488454, 0.08823556, -0.07168295, 0.041894365, 0.0789253, 0.06191209, 0.013991105), target1);
	target1 = MulAdd(nh1, MF4x4(0.10582237, 0.1514222, 0.10751824, 0.08231926, 0.23913008, -0.2673503, 0.036170945, 0.31463087, 0.026397424, -0.26629624, -0.07428361, -0.077513516, 0.0768238, -0.026638538, 0.12589583, -0.11521212), target1);
	target1 = MulAdd(ni1, MF4x4(0.30389515, 0.18963532, 0.023015842, -0.10240883, 0.045651495, -0.036785256, -0.13346411, 0.16431254, -0.030950911, -0.03381929, 0.09413111, 0.03924852, 0.11044091, -0.10149653, 0.14114548, 0.07801978), target1);
	target1 = MulAdd(na2, MF4x4(0.029622428, 0.14528686, -0.034057826, 0.010664312, 0.059213262, -0.29354423, -0.08448559, 0.10569036, -0.02988314, -0.016480735, 0.042203777, -0.028342744, 0.36807576, 0.09301971, 0.123721026, 0.07806503), target1);
	target1 = MulAdd(nb2, MF4x4(0.04849538, -0.09201287, 0.10069803, -0.031749677, 0.18774022, -0.27789372, 0.05288653, 0.08097265, 0.006918896, -0.060978457, -0.113319606, 0.008844536, 0.021804892, -0.0011744015, -0.35720357, -0.24996938), target1);
	target1 = MulAdd(nc2, MF4x4(-0.07147501, -0.09339197, 0.16154395, 0.3372506, -0.0004858638, -0.056553435, -0.12463908, -0.0047342298, -0.009141984, -0.13796125, -0.14035304, -0.104403175, -0.07054226, 0.12142519, -0.24971877, -0.1914648), target1);
	target1 = MulAdd(nd2, MF4x4(-0.008194284, -0.027617034, 0.004994261, -0.07672895, 0.25697777, -0.18313397, 0.03266311, -0.029157834, 0.010476624, 0.12394092, -0.059660904, 0.08561672, -0.0008583816, -0.044442356, 0.28336492, 0.065344445), target1);
	target1 = MulAdd(ne2, MF4x4(-0.3570137, -0.06802815, -0.10298613, -0.21256869, 0.3025278, -0.263425, 0.13547331, 0.038517762, 0.14951234, -0.16869017, 0.03293678, 0.21897063, -0.14688788, 0.21619378, -0.27550143, 0.048003722), target1);
	target1 = MulAdd(nf2, MF4x4(0.15607022, -0.111073844, 0.2733694, 0.05423378, 0.25116092, -0.17350473, 0.13460433, 0.09602139, 0.17372625, -0.0024815476, -0.30154657, 0.0062206364, -0.0051755225, 0.04985103, -0.06310478, -0.30450678), target1);
	target1 = MulAdd(ng2, MF4x4(0.057571005, -0.019051064, 0.054884393, 0.03993782, 2.6782007e-05, -0.05726912, 0.067192145, -0.08955987, -0.11937056, 0.15837386, -0.011670469, -0.06299701, -0.014917928, 0.23921679, 0.0054613873, -0.23099245), target1);
	target1 = MulAdd(nh2, MF4x4(-0.035849575, -0.06785954, -0.15053692, 0.011964653, 0.1975448, -0.1633047, -0.024539666, 0.03170174, -0.12585635, -0.021171011, 0.15862562, 0.10296358, 0.3114039, 0.10010659, -0.09519227, -0.12945092), target1);
	target1 = MulAdd(ni2, MF4x4(0.044433746, -0.058466546, -0.13258536, -0.033972915, 0.0037206819, -0.057343487, 0.13798106, 0.044445634, -0.22623023, 0.2408462, 0.048287082, -0.30717465, -0.13402344, 0.20024839, -0.026932377, -0.034217034), target1);
	
	MF4 target2 = { 0.012253057, 0.13434875, -0.10318777, -0.074252345 };
	target2 = MulAdd(a1, MF4x4(0.045249436, -0.040327657, -0.2667367, 0.0913868, 0.14961123, 0.07253207, 0.29162952, -0.11320944, 0.017569833, 0.012350104, 0.22532712, 0.025312115, -0.12193993, 0.037391737, 0.03220835, 0.12102545), target2);
	target2 = MulAdd(b1, MF4x4(-0.020587588, -0.07043244, -0.28093454, 0.18336722, 0.08153308, -0.05914772, -0.15255487, 0.079236075, -0.4269835, -0.11470208, -0.19043571, 0.2723162, 0.0066251885, -0.17115718, 0.022036036, 0.07349558), target2);
	target2 = MulAdd(c1, MF4x4(-0.09441315, 0.042170826, 0.071251415, -0.13891962, 0.10236482, 0.05356262, 0.0291025, 0.063867815, -0.14530063, -0.08727925, -0.0048300857, 0.06766869, -0.3481536, -0.10943503, 0.014951926, 0.11993114), target2);
	target2 = MulAdd(d1, MF4x4(0.13420522, 0.095721036, -0.1756104, -0.09906728, 0.09808904, -0.27402034, -0.102161326, 0.40162942, 0.13465238, 0.20237032, 0.3192343, -0.061512157, -0.20711629, -0.09659007, 0.06838548, 0.30256763), target2);
	target2 = MulAdd(e1, MF4x4(0.025805298, -0.0322599, 0.23653145, -0.2760735, 0.11291006, -0.10836205, 0.20742846, 0.06974535, -0.4191803, -0.10882523, 0.038603242, 0.22662747, -0.08845715, -0.26151156, -0.16670766, 0.008536192), target2);
	target2 = MulAdd(f1, MF4x4(-0.085842185, -0.21239999, -0.032774646, 0.088163696, 0.038300447, -0.09510875, 0.10113864, -0.14712982, 0.14264707, -0.10895432, 0.03051617, -0.06791873, -0.35589013, -0.12884575, -0.09460007, -0.0879575), target2);
	target2 = MulAdd(g1, MF4x4(0.19235751, -0.109611385, -0.037397474, -0.26632717, 0.07878826, 0.19749992, 0.0035685285, 0.11793927, 0.019899402, 0.085741036, 0.08433813, -0.018344546, -0.0901484, 0.08221562, 0.12735383, 0.12801875), target2);
	target2 = MulAdd(h1, MF4x4(0.19123435, 0.007882246, -0.018564796, -0.09904253, 0.28052533, 0.6360808, 0.25001726, -0.30590564, 0.07646281, -0.34298185, -0.33293694, -0.036753535, 0.18719083, 0.22131144, -0.1420962, -0.0014709529), target2);
	target2 = MulAdd(i1, MF4x4(0.23060241, -0.14145076, -0.113213465, 0.037221998, 0.22163334, 0.18520229, 0.2961799, -0.063605964, 0.022606356, 0.043340076, -0.3233993, -0.075055614, -0.0038865958, 0.19558622, -0.018503085, -0.22932632), target2);
	target2 = MulAdd(a2, MF4x4(0.11712158, -0.03590364, 0.38039652, -0.019910801, 0.13338004, -0.07078425, 0.09404417, -0.27607328, -0.02205519, -0.013522961, 0.2924021, -0.16088538, -0.034280356, -0.063614614, -0.061583273, -0.22479968), target2);
	target2 = MulAdd(b2, MF4x4(-0.05624079, 0.32659104, 0.47335497, -0.14091404, 0.14739423, -0.07122778, -0.009384643, -0.058900848, 0.06260307, -0.17574102, 0.3538743, 0.2842822, -0.18150197, 0.26806462, 0.24673693, 0.19710627), target2);
	target2 = MulAdd(c2, MF4x4(-0.24837571, -0.01663848, -0.13093965, 0.30109972, -0.09680959, 0.074526474, 0.024111765, -0.012781737, -0.08591349, -0.100348584, 0.02363011, -0.02687084, -0.27630556, 0.14074354, -0.016993485, 0.084373675), target2);
	target2 = MulAdd(d2, MF4x4(0.1543391, -0.2008408, -0.21885285, 0.2320177, 0.06669948, -0.05171086, -0.25833863, -0.14085051, -0.035878573, -0.1632403, 0.09782713, 0.22973235, -0.14022017, -0.018347954, -0.29652777, 0.10912002), target2);
	target2 = MulAdd(e2, MF4x4(-0.050962634, -0.040519282, -0.04381614, 0.084133334, 0.21222316, -0.091010064, 0.13157965, -0.21375372, -0.021148674, -0.044127557, -0.11400533, 0.097688414, 0.31571037, -0.05167655, 0.27606225, 0.12169133), target2);
	target2 = MulAdd(f2, MF4x4(-0.1329087, 0.14291021, 0.043337896, -0.25970098, -0.11379552, -0.040157612, 0.08379851, -0.24104865, 0.1593102, -0.031879216, -0.004603848, -0.019003935, -0.24769545, -0.17577063, 0.16019398, 0.04640235), target2);
	target2 = MulAdd(g2, MF4x4(-0.11615644, 0.12189521, 0.12919527, -0.104224406, -0.10143574, 0.14024515, -0.02759362, -0.1467619, 0.09028311, -0.06510291, 0.061612967, 0.10227729, -0.08785846, 0.06464871, -0.05048917, 0.09055746), target2);
	target2 = MulAdd(h2, MF4x4(0.34443164, 0.013906371, -0.0595573, 0.09354196, 0.12184454, -0.02698316, -0.06208632, -0.11266858, 0.004904335, -0.33987018, -0.2494041, 0.127125, 0.040493876, 0.0280356, -0.037431944, 0.05823802), target2);
	target2 = MulAdd(i2, MF4x4(-0.1762869, -0.20683959, -0.37788594, -0.1244979, -0.17202286, -0.038234763, 0.015924744, -0.014006752, 0.07097758, -0.25219876, -0.3164728, 0.022413896, -0.41423917, -0.03191542, 0.009464804, 0.0770316), target2);
	target2 = MulAdd(na1, MF4x4(0.12442388, 0.031095076, 0.18799834, -0.18449762, -0.11995044, 0.11634828, -0.0055850362, 0.08558657, -0.025694892, -0.2854381, -0.32876188, 0.14690274, -0.1835963, -0.1786755, -0.44678628, 0.1678422), target2);
	target2 = MulAdd(nb1, MF4x4(0.031241562, -0.1265462, 0.081369035, -0.1184643, 0.0010021052, -0.10810683, -0.039572187, 0.13850863, -0.010703417, -0.057981443, 0.30309856, 0.13869847, -0.16935349, 0.16969836, 0.045642667, 0.26460654), target2);
	target2 = MulAdd(nc1, MF4x4(0.28779998, 0.04767888, -0.011856489, 0.114210494, 0.034624737, 0.19084676, -0.02740287, 0.035041407, -0.049002927, 0.10928203, 0.17362499, -0.1280889, 0.00077811617, -0.17594084, -0.18379052, 0.22303762), target2);
	target2 = MulAdd(nd1, MF4x4(0.0008487252, -0.060438234, 0.109334275, -0.18768874, 0.13844973, 0.09226474, 0.18361697, -0.19385563, -0.29241335, -0.1033556, -0.3289991, 0.10027422, -0.09454755, -0.22817631, -0.2964217, -0.19499257), target2);
	target2 = MulAdd(ne1, MF4x4(-0.057920385, 0.06342629, -0.048577324, 0.15952215, -0.061343953, 0.16471362, 0.1501856, 0.027373426, 0.01837245, -0.0732048, 0.09776471, 0.14817989, -0.112215854, 0.109101914, 0.058316242, 0.29969788), target2);
	target2 = MulAdd(nf1, MF4x4(-0.12411656, -0.033170763, -0.08715826, 0.110862456, 0.1871076, 0.14550175, 0.23373431, 0.19281025, -0.37016305, -0.11924462, 0.026793748, 0.092801645, 0.04318573, 0.20969667, -0.39267823, 0.1938874), target2);
	target2 = MulAdd(ng1, MF4x4(-0.15932916, 0.22217506, 0.007901788, -0.04037383, 0.09095982, -0.043115042, 0.098845564, -0.073432215, -0.14535685, 0.11504512, -0.07950504, -0.010718905, -0.050012022, -0.13089752, -0.3323894, -0.005423676), target2);
	target2 = MulAdd(nh1, MF4x4(0.007320675, 0.21108273, 0.20758918, -0.04005568, -0.13234317, -0.15708306, 0.41804615, -0.09720499, -0.09623786, 0.2441289, 0.33276868, 0.17716111, -0.45670444, -0.026252905, -0.01958701, 0.24028622), target2);
	target2 = MulAdd(ni1, MF4x4(-0.14936383, -0.023504466, -0.028479185, -0.053541556, -0.060263615, -0.087681144, 0.2435555, 0.08470686, -0.17713271, -0.2303349, 0.09337386, 0.039068084, -0.16263027, 0.034289114, 0.16604292, 0.10550447), target2);
	target2 = MulAdd(na2, MF4x4(-0.16556105, 0.12211341, -0.0036831333, 0.13802956, 0.065256506, 0.03395266, -0.2296282, 0.21284704, 0.017770419, -0.1722762, -0.1741687, 0.10708671, 0.331979, 0.11924846, -0.09410989, -0.123036265), target2);
	target2 = MulAdd(nb2, MF4x4(-0.096586555, -0.30475244, -0.24065268, 0.053860847, 0.19413544, 0.05542323, -0.06327867, 0.012265184, -0.08913778, 0.13779551, -0.099127166, 0.007493773, -0.07125554, -0.0011684593, -0.003005287, -0.094847135), target2);
	target2 = MulAdd(nc2, MF4x4(0.21711998, -0.13086027, 0.07825239, -0.21121782, 0.055840425, -0.0019166623, -0.05480048, 0.019817038, 0.007626905, 0.14126389, 0.04515749, -0.029315706, 0.18555732, -0.114861906, -0.21993469, 0.031716693), target2);
	target2 = MulAdd(nd2, MF4x4(-0.06716353, -0.11964145, 0.09711908, -0.061763637, -0.0948045, 0.14189975, 0.2810092, 0.2505306, 0.08872909, 0.086749084, -0.17528322, -0.048835423, 0.124959685, -0.12602286, 0.065660164, -0.06783225), target2);
	target2 = MulAdd(ne2, MF4x4(-0.23066516, -0.0068310793, -0.0021060852, 0.09136854, 0.09919007, 0.2259628, -0.026603302, 0.1367709, -0.07940821, 0.14962214, 0.00652088, -0.3114987, -0.18900892, -0.20450105, 0.09329685, -0.19482759), target2);
	target2 = MulAdd(nf2, MF4x4(0.095197074, 0.06346413, -0.05207484, -0.086378016, 0.19733003, 0.1448027, -0.02410627, 0.024829205, -0.20296144, -0.09551166, 0.022987023, 0.09035918, -0.15824226, 0.1350293, -0.06641893, 0.11739518), target2);
	target2 = MulAdd(ng2, MF4x4(0.08381447, -0.13171835, -0.030271608, 0.14649504, 0.0007350431, 0.15303299, -0.001797464, 0.30294403, -0.07635094, -0.102541, -0.12176348, 0.053775523, 0.08070882, -0.035387367, -0.09521456, 0.22530125), target2);
	target2 = MulAdd(nh2, MF4x4(-0.04650126, 0.12029137, 0.009236626, -0.1371486, -0.119391896, 0.20490645, 0.17123316, -0.015455403, 0.05842872, 0.14354227, 0.37586045, 0.054906923, 0.062954046, 0.07285954, 0.12260665, -0.08675996), target2);
	target2 = MulAdd(ni2, MF4x4(0.22510684, -0.010087092, 0.005660375, 0.05069907, 0.10297958, 0.1411009, 0.09538159, 0.00922383, -0.31313825, -0.06449414, 0.109746836, 0.30148697, 0.35861742, -0.045380104, 0.09908991, -0.1933117), target2);

	MF3 target3 = tex5.SampleLevel(sam, pos, 0).rgb;
	target3 = MulAdd(e1, MF4x3(-0.02302231, -0.035528302, -0.030674051, 0.029780716, 0.031591274, 0.045867007, 0.01335752, 0.037001595, 0.04351411, -0.11126892, 0.038589563, 0.06444906), target3);
	target3 = MulAdd(e2, MF4x3(0.0047764573, -0.063372664, -0.065609895, 0.0478139, 0.025694113, 0.025097322, -0.1019169, 0.029989049, 0.050038517, 0.07504127, -0.017047737, -0.026222635), target3);
	target3 = MulAdd(ne1, MF4x3(0.0024485083, 0.00640911, 0.008171829, -0.014622121, -0.06078096, -0.0800138, -0.0062360805, -0.014344496, -0.021332184, 0.117842786, -0.103745885, -0.13756834), target3);
	target3 = MulAdd(ne2, MF4x3(-0.01942775, 0.08720701, 0.104858086, -0.05545872, -0.041375194, -0.035368554, 0.080331706, -0.021207837, -0.043905254, -0.12515299, 3.445463e-05, 0.018742712), target3);

	tex3[gxy] = target1;
	tex4[gxy] = target2;
	tex6[gxy] = MF4(target3, 1);
}

//!PASS 7
//!DESC Conv-4x3x3x16
//!IN tex3, tex4, tex6
//!OUT tex1, tex2, tex5
//!BLOCK_SIZE 8
//!NUM_THREADS 64

void Pass7(uint2 blockStart, uint3 threadId) {
	uint2 gxy = Rmp8x8(threadId.x) + blockStart;
	uint2 inputSize = GetInputSize();
	if (gxy.x >= inputSize.x || gxy.y >= inputSize.y) {
		return;
	}

	float2 inputPt = GetInputPt();
	float2 pos = (gxy + 0.5f) * inputPt;

	// [ a, d, g ]
	// [ b, e, h ]
	// [ c, f, i ]
	MF4 a1 = tex3.SampleLevel(sam, pos + float2(-inputPt.x, -inputPt.y), 0);
	MF4 b1 = tex3.SampleLevel(sam, pos + float2(-inputPt.x, 0), 0);
	MF4 c1 = tex3.SampleLevel(sam, pos + float2(-inputPt.x, inputPt.y), 0);
	MF4 d1 = tex3.SampleLevel(sam, pos + float2(0, -inputPt.y), 0);
	MF4 e1 = tex3.SampleLevel(sam, pos, 0);
	MF4 f1 = tex3.SampleLevel(sam, pos + float2(0, inputPt.y), 0);
	MF4 g1 = tex3.SampleLevel(sam, pos + float2(inputPt.x, -inputPt.y), 0);
	MF4 h1 = tex3.SampleLevel(sam, pos + float2(inputPt.x, 0), 0);
	MF4 i1 = tex3.SampleLevel(sam, pos + float2(inputPt.x, inputPt.y), 0);

	MF4 na1 = max(-a1, 0);
	MF4 nb1 = max(-b1, 0);
	MF4 nc1 = max(-c1, 0);
	MF4 nd1 = max(-d1, 0);
	MF4 ne1 = max(-e1, 0);
	MF4 nf1 = max(-f1, 0);
	MF4 ng1 = max(-g1, 0);
	MF4 nh1 = max(-h1, 0);
	MF4 ni1 = max(-i1, 0);

	a1 = max(a1, 0);
	b1 = max(b1, 0);
	c1 = max(c1, 0);
	d1 = max(d1, 0);
	e1 = max(e1, 0);
	f1 = max(f1, 0);
	g1 = max(g1, 0);
	h1 = max(h1, 0);
	i1 = max(i1, 0);

	MF4 a2 = tex4.SampleLevel(sam, pos + float2(-inputPt.x, -inputPt.y), 0);
	MF4 b2 = tex4.SampleLevel(sam, pos + float2(-inputPt.x, 0), 0);
	MF4 c2 = tex4.SampleLevel(sam, pos + float2(-inputPt.x, inputPt.y), 0);
	MF4 d2 = tex4.SampleLevel(sam, pos + float2(0, -inputPt.y), 0);
	MF4 e2 = tex4.SampleLevel(sam, pos, 0);
	MF4 f2 = tex4.SampleLevel(sam, pos + float2(0, inputPt.y), 0);
	MF4 g2 = tex4.SampleLevel(sam, pos + float2(inputPt.x, -inputPt.y), 0);
	MF4 h2 = tex4.SampleLevel(sam, pos + float2(inputPt.x, 0), 0);
	MF4 i2 = tex4.SampleLevel(sam, pos + float2(inputPt.x, inputPt.y), 0);

	MF4 na2 = max(-a2, 0);
	MF4 nb2 = max(-b2, 0);
	MF4 nc2 = max(-c2, 0);
	MF4 nd2 = max(-d2, 0);
	MF4 ne2 = max(-e2, 0);
	MF4 nf2 = max(-f2, 0);
	MF4 ng2 = max(-g2, 0);
	MF4 nh2 = max(-h2, 0);
	MF4 ni2 = max(-i2, 0);

	a2 = max(a2, 0);
	b2 = max(b2, 0);
	c2 = max(c2, 0);
	d2 = max(d2, 0);
	e2 = max(e2, 0);
	f2 = max(f2, 0);
	g2 = max(g2, 0);
	h2 = max(h2, 0);
	i2 = max(i2, 0);

	MF4 target1 = { 0.013113342, -0.2905848, -0.029724011, 0.1769613 };
	target1 = MulAdd(a1, MF4x4(-0.04279202, -0.01698567, 0.18318103, -0.18172316, 0.04757184, 0.07232096, -0.054900512, 0.11956132, 0.048900753, 0.0006714882, -0.09200336, 0.16104606, 0.38940707, 0.2754208, -0.12735553, -0.30017206), target1);
	target1 = MulAdd(b1, MF4x4(0.2469705, 0.103162065, 0.10321547, -0.1292231, 0.3013039, -0.018333653, -0.19897339, 0.122247696, 0.14719778, 0.003909129, -0.19585025, 0.03670547, -0.2132921, 0.33642963, 0.17569672, 0.07414473), target1);
	target1 = MulAdd(c1, MF4x4(0.015335451, 0.15161209, 0.0447609, -0.042884503, 0.14257035, 0.07775234, -0.2064044, 0.03842874, -0.1660166, -0.19817057, -0.10740875, -0.123968095, 0.14156081, -0.2197906, -0.08622206, 0.4185408), target1);
	target1 = MulAdd(d1, MF4x4(-0.33392438, -0.12483512, -0.062084857, 0.16336447, 0.09862199, 0.1659862, 0.034751434, -0.11968266, -0.017155796, 0.21001562, -0.053017724, 0.10386376, 0.07066254, 0.50014263, 0.31065208, -0.026068505), target1);
	target1 = MulAdd(e1, MF4x4(-0.34320992, -0.030056434, -0.24118581, -0.024320357, 0.327435, -0.036838267, -0.19433706, 0.24561343, -0.1489437, 0.225435, 0.18421564, 0.021147838, 0.264245, 0.16846146, -0.51724315, 0.039252095), target1);
	target1 = MulAdd(f1, MF4x4(-0.25945047, 0.12058094, 0.2889452, -0.061687145, -0.10309796, -0.19476385, -0.10393912, 0.16837607, -0.05198191, -0.036113493, -0.11847194, 0.16367626, 0.018113747, 0.059499823, 0.0062132217, 0.15846115), target1);
	target1 = MulAdd(g1, MF4x4(0.094601326, 0.053219795, 0.027610637, 0.12041253, 0.21425363, 0.15754686, 0.08518286, -0.00661778, -0.021661628, -0.17554528, -0.014842315, 0.22240937, 0.15908821, -0.20964032, 0.21754523, 0.30307937), target1);
	target1 = MulAdd(h1, MF4x4(0.13757955, 0.06684095, -0.03616685, -0.014618309, 0.04168136, -0.17148526, -0.16317028, 0.14210777, 0.102521434, -0.19108291, -0.14441934, 0.14435884, 0.24228935, -0.10589834, 0.24029285, 0.27317202), target1);
	target1 = MulAdd(i1, MF4x4(-0.16239886, -0.073841535, 0.067964345, -0.11332664, 0.07695667, -0.047180675, -0.08260769, 0.09427637, 0.09471068, 0.012713836, 0.14605078, -0.062490974, -0.11498225, 0.04150893, 0.37402585, 0.21953487), target1);
	target1 = MulAdd(a2, MF4x4(-0.07445113, -0.14220217, 0.09271495, -0.014715529, -0.37606132, -0.14938155, -0.024809113, 0.22279873, -0.011379667, -0.04545505, -0.033382278, 0.08971831, 0.016359061, -0.016230864, 0.052939463, -0.07754285), target1);
	target1 = MulAdd(b2, MF4x4(0.10961948, 0.09230085, 0.061259165, 0.0015837378, 0.053883027, -0.22557226, 0.018400123, 0.43234614, 0.08967873, 0.06687854, -0.4389578, -0.01658211, -0.040707946, 0.0048945122, 0.1433802, 0.049759727), target1);
	target1 = MulAdd(c2, MF4x4(-0.027641231, 0.026085567, 0.109188825, -0.19011945, 0.19309571, 0.0084956605, 0.05034047, -0.08674781, -0.008124587, 0.031490494, -0.0744263, 0.084508896, -0.007835403, 0.13120581, 0.0021786217, -0.025225073), target1);
	target1 = MulAdd(d2, MF4x4(0.020191731, 0.24703082, -0.36845222, 0.0032569442, -0.1497622, 0.05968502, 0.09595371, 0.008410154, 0.119981945, -0.09983294, -0.19541258, -0.111814305, -0.25664008, 0.31031236, -0.23063917, -0.13823026), target1);
	target1 = MulAdd(e2, MF4x4(-0.092747286, 0.23009373, -0.29804415, 0.05036082, 0.031480987, 0.18805481, 0.3676576, 0.06004687, 0.19841099, -0.058367446, -0.44229323, -0.19645047, 0.037667975, 0.12398346, -0.25753063, -0.26919344), target1);
	target1 = MulAdd(f2, MF4x4(-0.019061154, 0.03841801, -0.28433323, 0.38128456, -0.059526864, 0.29960185, 0.014484517, -0.10234412, 0.05444907, -0.12615138, 0.14936689, -0.079120934, 0.028092088, 0.096715964, 0.0037780635, -0.12791039), target1);
	target1 = MulAdd(g2, MF4x4(0.26949528, 0.015951393, 0.15355164, -0.030336212, -0.100286454, -0.052609976, 0.03197625, -0.092190474, 0.06131517, 0.18291938, -0.15216532, -0.026021928, 0.18581273, -0.10659101, 0.14806952, 0.20509768), target1);
	target1 = MulAdd(h2, MF4x4(-0.2205839, 0.11654808, 0.43800604, 0.03188946, 0.13840868, 0.020377772, 0.038510147, 0.03779825, -0.23494276, 0.08624197, 0.036650848, -0.115041405, -0.03776705, -0.32108167, 0.0094707385, 0.37881464), target1);
	target1 = MulAdd(i2, MF4x4(-0.031778246, -0.38020673, 0.16956653, 0.33444092, -0.042172886, -0.03465591, -0.17585713, 0.025507452, 0.07595919, -0.06807453, -0.100295454, -0.019174794, 0.07763043, -0.09321411, -0.05212223, 0.112239085), target1);
	target1 = MulAdd(na1, MF4x4(-0.048172995, -0.012284629, 0.12846173, -0.13459995, 0.25443402, -0.013064909, 0.15480834, 0.14016332, 0.036635883, -0.049085367, 0.0506487, 0.26623604, -0.023176057, 0.012088936, -0.1844897, 0.040488705), target1);
	target1 = MulAdd(nb1, MF4x4(0.2147455, 0.17323543, -0.2943051, -0.053386763, -0.023367947, 0.090753146, -0.011997397, -0.0626111, -0.13558747, -0.035944186, -0.014752113, 0.25506687, 0.055502877, 0.31465453, -0.16283247, -0.08967175), target1);
	target1 = MulAdd(nc1, MF4x4(0.033773236, -0.09510872, -0.09313707, 0.046486538, -0.1699796, -0.11685979, 0.22197925, -0.013884658, 0.12514, -0.12129843, -0.09695589, -0.075202964, -0.12321221, 0.18949097, -0.03694664, -0.2306249), target1);
	target1 = MulAdd(nd1, MF4x4(0.08668444, -0.22983012, -0.30873656, 0.07371376, 0.082137264, -0.014844924, 0.2283955, 0.24782042, 0.31113505, 0.14810014, 0.32804835, -0.12014127, -0.17742543, -0.15872951, -0.080107674, -0.16898526), target1);
	target1 = MulAdd(ne1, MF4x4(0.29746926, 0.19479977, 0.13996765, -0.4268552, -0.16478531, 0.0835479, 0.45685142, -0.05510062, -0.1282004, 0.12359051, 0.34026766, -0.26152933, -0.13128015, 0.329812, 0.27172327, -0.06600192), target1);
	target1 = MulAdd(nf1, MF4x4(-0.06552484, 0.19600633, 0.12407863, -0.13815112, 0.17426166, 0.040930413, 0.06495108, 0.034157254, -0.029772963, 0.015127817, 0.10718436, -0.13752984, -0.0205358, 0.1884735, 0.104591034, -0.020779913), target1);
	target1 = MulAdd(ng1, MF4x4(-0.053475305, -0.13616458, 0.05487909, 0.13256747, -0.10030239, -0.12376705, 0.062755466, 0.03264356, 0.068466686, 0.05019395, -0.034875803, -0.17806669, -0.21720818, 0.25592342, -0.2685692, -0.27576914), target1);
	target1 = MulAdd(nh1, MF4x4(-0.04562929, 0.04225299, -0.22311088, -0.09517893, -0.19886662, -0.11944208, 0.11044239, -0.10464355, 0.037634842, 0.124069214, 0.0927385, 0.108838566, -0.088783056, 0.17008123, -0.1007014, -0.23137446), target1);
	target1 = MulAdd(ni1, MF4x4(0.10306672, 0.027472405, -0.069015354, -0.14412996, 0.24068132, -0.10624665, -0.25597134, 0.05208812, -0.10230778, 0.006520562, -0.11931577, 0.26738268, -0.09168354, 0.13557245, -0.008878644, -0.22292739), target1);
	target1 = MulAdd(na2, MF4x4(-0.09403718, 0.11993688, -0.036254726, -0.053109076, 0.18422048, 0.25203657, 0.10025996, -0.11272799, -0.22040273, -0.05758331, -0.07059054, -0.054108664, -0.20009018, -0.22061199, 0.057880517, -0.26669186), target1);
	target1 = MulAdd(nb2, MF4x4(-0.08534496, 0.0027822452, -0.01112169, -0.13484463, -0.09446875, -0.057457812, -0.03910888, -0.2816038, -0.096015625, -0.03636662, 0.12532772, 0.092033, 0.038156748, -0.101240925, 0.024886698, -0.086328045), target1);
	target1 = MulAdd(nc2, MF4x4(0.2349796, 0.19884427, -0.0734711, 0.08422328, -0.07201622, 0.020658491, 0.1331021, 0.039766714, 0.19280422, 0.13086005, -0.11339721, -0.14782044, 0.19341573, 0.16767374, -0.03593828, 0.18139753), target1);
	target1 = MulAdd(nd2, MF4x4(-0.040663462, -0.15233721, 0.524604, 0.26603413, 0.07202415, 0.053382196, 0.030758869, -0.06144292, -0.010495834, 0.13868876, -0.020688854, -0.15551737, -0.2958513, -0.32805985, -0.25359175, -0.036683984), target1);
	target1 = MulAdd(ne2, MF4x4(-0.06644081, -0.145321, 0.24945419, 0.031560224, 0.17245345, 0.23418438, 0.20341763, -0.2619872, 0.038787205, 0.16488725, 0.0019107185, 0.03820528, 0.04169643, -0.34155026, -0.11183654, 0.028614044), target1);
	target1 = MulAdd(nf2, MF4x4(-0.028469078, 0.010781976, 0.05263661, -0.15337946, -0.20491667, -0.13879907, 0.13934538, 0.061196275, 0.056804053, 0.063193604, -0.2389496, 0.037072126, -0.058510017, 0.036215063, 0.3074709, 0.10517675), target1);
	target1 = MulAdd(ng2, MF4x4(0.028534278, 0.0022668538, 0.04492863, -0.060705435, 0.06349762, -0.016823182, -0.09148226, 0.03930522, -0.083295114, 0.14799853, -0.08089152, -0.21993661, -0.23298621, 0.05106244, -0.013708201, -0.16311577), target1);
	target1 = MulAdd(nh2, MF4x4(0.05885827, 0.122300275, -0.16086812, -0.21892425, -0.07548077, 0.09286181, -0.027564062, -0.028723463, -0.0056181233, 0.23472206, -0.0049285595, -0.45054138, 0.07592325, -0.044704806, 0.019616256, -0.06956836), target1);
	target1 = MulAdd(ni2, MF4x4(0.036423888, 0.20839189, -0.16420732, -0.15954947, -0.11311323, -0.24191359, 0.19845375, 0.084540576, -0.20946553, 0.09259613, 0.03234368, -0.056766506, -0.11992363, -0.06882079, -0.020428827, -0.093375795), target1);
	
	MF4 target2 = { 0.22705397, -0.029518934, -0.026397338, -0.08183741 };
	target2 = MulAdd(a1, MF4x4(0.093678355, -0.08574688, 0.007699401, -0.038818456, -0.10667588, 0.043627866, 0.23127791, 0.061317544, -0.32790044, 0.08618836, 0.009400048, -0.17129329, 0.23541448, -0.015561885, -0.11172365, -0.1190039), target2);
	target2 = MulAdd(b1, MF4x4(-0.0052874424, 0.08136584, -0.12633958, -0.016064916, 0.14033778, 0.07755252, -0.26242834, 0.063312635, 0.06861756, 0.14867078, -0.2561066, 0.33325562, -0.106489345, -0.10068009, -0.039633382, -0.016305668), target2);
	target2 = MulAdd(c1, MF4x4(-0.27784392, -0.14990395, -0.35981888, -0.2564094, -0.07480205, -0.026457628, 0.1027643, 0.19381845, -0.07160986, -0.15616457, -0.032070953, 0.32998616, 0.15383582, 0.16622585, -0.1435993, -0.02287804), target2);
	target2 = MulAdd(d1, MF4x4(-0.09360053, 0.58019537, 0.02028909, 0.413114, 0.025173154, -0.030326266, -0.028177274, -0.12964654, -0.25432733, -0.06556034, 0.023097439, -0.09458851, -0.21772051, -0.10324596, -0.36674342, -0.14803977), target2);
	target2 = MulAdd(e1, MF4x4(-0.1227467, 0.20252965, 0.2559927, 0.08719227, 0.030749539, -0.2526622, -0.25694713, -0.2960799, -0.34960067, -0.25393236, -0.28439638, 0.086787805, -0.34202877, 0.21933395, 0.23473133, 0.079260886), target2);
	target2 = MulAdd(f1, MF4x4(-0.00147522, -0.16591258, -0.030617915, 0.10052425, -0.1822102, 0.038774874, -0.04285007, 0.07312042, 0.052175622, -0.33510515, 0.027545406, 0.2995306, -0.08535316, 0.11144203, 0.27999434, -0.09770663), target2);
	target2 = MulAdd(g1, MF4x4(-0.04394928, -0.26842886, -0.08354109, 0.04077001, -0.009221606, 0.0328837, 0.006459338, 0.08984004, -0.13035133, 0.20004508, 0.21950854, -0.12742348, 0.32386312, 0.085903555, -0.29273173, -0.056370437), target2);
	target2 = MulAdd(h1, MF4x4(0.019171638, -0.1824711, -0.10899421, -0.16201603, 0.054712642, -0.020315547, -0.048609916, -0.068621606, -0.055706583, -0.25671515, -0.019494208, 0.08366393, 0.09531471, -0.05988052, -0.024995802, 0.019303525), target2);
	target2 = MulAdd(i1, MF4x4(-0.08694609, 0.26762635, 0.10477892, -0.15392998, -0.059596587, -0.047562487, -0.25932398, -0.054960977, -0.00015596532, 0.07196634, -0.017385524, -0.18826845, -0.017969077, -0.27291682, -0.153906, -0.107691295), target2);
	target2 = MulAdd(a2, MF4x4(0.17340474, -0.1285696, -0.04484238, 0.15782213, -0.06190358, 0.27896214, 0.28475145, -0.042519942, -0.19862229, -0.1354097, 0.14344497, 0.015599392, 0.18698554, 0.035121564, -0.018465763, 0.0010143917), target2);
	target2 = MulAdd(b2, MF4x4(-0.13428356, -0.06612225, 0.19397905, 0.14209093, 0.1526626, 0.2617573, -0.15316434, 0.35452205, 0.05003259, 0.07679617, -0.008399171, -0.0062716682, 0.11833864, 0.1331285, -0.006803729, 0.22615404), target2);
	target2 = MulAdd(c2, MF4x4(0.0020632436, -0.173174, -0.15404437, 0.05430569, 0.21100305, 0.39063898, -0.019479724, 0.17396629, -0.061121427, -0.13424753, -0.008459669, -0.04975768, 0.20599939, -0.11374013, -0.21116278, 0.063624285), target2);
	target2 = MulAdd(d2, MF4x4(-0.0073831948, -0.12009769, -0.16402034, 0.054093774, 0.061061747, -0.009054565, -0.02815144, -0.17071937, -0.22791979, 0.073427565, 0.25161973, 0.1011713, -0.23804636, 0.13810354, 0.09063126, -0.23065178), target2);
	target2 = MulAdd(e2, MF4x4(-0.31885087, 0.21730177, -0.20516786, 0.04075695, -0.2736768, -0.38779113, -0.19445951, -0.14024325, -0.11824961, -0.102919355, -0.17858729, -0.013441498, 0.16320607, -0.27105078, -0.00019549616, 0.024509901), target2);
	target2 = MulAdd(f2, MF4x4(-0.16024838, -0.3132909, -0.15461555, 0.34874174, -0.0051668375, 0.1811257, 0.3384939, 0.16381103, 0.047184363, -0.20424844, -0.1330078, -0.13795874, 0.21890834, -0.08242861, 0.22677775, 0.031102268), target2);
	target2 = MulAdd(g2, MF4x4(0.19408257, 0.016361775, -0.202373, 0.2245766, -0.008954751, -0.047279913, -0.09170596, 0.01567793, -0.0019059096, -0.07785436, 0.0756357, 0.09683383, 0.034215495, -0.030802004, -0.077977195, -0.1101297), target2);
	target2 = MulAdd(h2, MF4x4(-0.1060503, -0.0044663083, -0.14942732, -0.11696249, -0.04550482, 0.11463188, 0.17801443, 0.07229662, -0.14176941, 0.02773344, -0.10770335, -0.08745911, -0.023052111, -0.17474785, 0.016645849, -0.059080444), target2);
	target2 = MulAdd(i2, MF4x4(-0.050500304, -0.14716387, 0.04525464, 0.23543595, 0.08411192, 0.16031684, 0.1659825, -0.03595111, -0.012943453, 0.13354135, -0.051425032, -0.0075654723, 0.11174184, 0.1266808, -0.18799087, 0.10571744), target2);
	target2 = MulAdd(na1, MF4x4(-0.15583408, 0.09837484, 0.19239932, -0.03557196, -0.05406335, 0.096456856, -0.13921897, -0.2212671, 0.28973594, 0.04017474, -0.25423512, 0.1522156, -0.10563249, -0.033190794, 0.101713456, -0.08922746), target2);
	target2 = MulAdd(nb1, MF4x4(-0.0787607, -0.14545321, 0.099762656, -0.2824299, 0.10130184, 0.019948835, -0.1013831, 0.06604923, 0.089561954, 0.28344154, 0.05757009, 0.04981809, -0.15927236, 0.008129835, -0.04280382, 0.10653281), target2);
	target2 = MulAdd(nc1, MF4x4(0.28149363, 0.019583186, 0.25983065, 0.30190885, 0.055435803, -0.01970755, 0.04546505, -0.027456624, 0.43886992, -0.032305803, -0.23557569, 0.12753153, -0.18509789, -0.073295385, 0.0083466545, -0.08271229), target2);
	target2 = MulAdd(nd1, MF4x4(0.016040009, -0.20475672, -0.015803276, 0.18247975, 0.21178837, -0.041543446, -0.24716362, 0.10105528, 0.19479224, -0.06583694, -0.09192672, -0.037776746, 0.09636229, -0.12086331, 0.13989103, 0.014564729), target2);
	target2 = MulAdd(ne1, MF4x4(0.19923596, -0.4132588, -0.4254784, -0.33433357, -0.16956097, -0.25086832, 0.23311833, -0.08976422, 0.06432824, -0.0071802614, 0.0033370545, -0.11073493, -0.46609998, -0.09332235, -0.27287352, 0.052513942), target2);
	target2 = MulAdd(nf1, MF4x4(-0.06954148, -0.06908355, -0.01875471, -0.35067585, 0.038715206, 0.08843527, 0.28899097, -0.024983376, 0.05879495, 0.110363334, 0.055481512, -0.0046147215, -0.035302363, -0.2722019, -0.0829261, 0.21088009), target2);
	target2 = MulAdd(ng1, MF4x4(-0.101971015, -0.18584369, 0.1469676, 0.025965, 0.07205807, 0.08838771, 0.08537094, 0.023344917, -0.106373414, -0.09254277, -0.25996596, 0.24570447, 0.00590166, -0.20074098, -0.05443169, -0.10562662), target2);
	target2 = MulAdd(nh1, MF4x4(0.12980327, -0.16834956, -0.1635997, 0.23437372, -0.07374834, 0.0062907683, 0.17292136, 0.0018093853, 0.04122969, -0.025285576, 0.29646805, 0.13402736, -0.040267725, 0.0011441729, -0.18658921, 0.12006417), target2);
	target2 = MulAdd(ni1, MF4x4(0.13221453, 0.15109141, 0.07707579, 0.05148666, -0.039716493, 0.12869143, -0.012840577, 0.10953536, -0.05721115, -0.120122276, -0.07632444, 0.32949027, 0.00022400127, 0.22217369, 0.2180494, -0.028773604), target2);
	target2 = MulAdd(na2, MF4x4(-0.08405412, 0.11332542, 0.120847605, 0.00520135, -0.13689686, -0.1459117, -0.029643068, 0.16147274, 0.21844815, -0.036921967, -0.12862785, -0.15930249, -0.11265427, -0.17471205, 0.0026749703, 0.2048758), target2);
	target2 = MulAdd(nb2, MF4x4(-0.03768306, -0.07585988, 0.046583172, -0.35557657, 0.012359812, -0.05498573, 0.19581361, -0.08186999, -0.008727976, -0.16623624, -0.03647879, 0.22760212, 0.048297524, -0.12502927, 0.08636729, -0.26437047), target2);
	target2 = MulAdd(nc2, MF4x4(-0.19518375, 0.17423135, 0.19473018, -0.22721744, -0.25087392, -0.17043075, -0.021999557, -0.27388734, -0.096786864, -0.012226921, 0.16101876, 0.030362492, -0.017619403, -0.2494354, -0.07336028, 0.06842719), target2);
	target2 = MulAdd(nd2, MF4x4(0.13816363, 0.14551367, -0.08497621, 0.15563537, -0.01600614, -0.010629245, 0.007773828, 0.2733634, 0.13066974, -0.2223056, -0.12664202, -0.19242655, -0.13211249, 0.065143794, 0.23912583, 0.19819915), target2);
	target2 = MulAdd(ne2, MF4x4(0.001870705, -0.0028601827, 0.14014813, 0.14659253, -0.037523735, 0.3726274, 0.13139205, 0.0112125, -0.16308945, -0.17571904, 0.12799808, -0.032106552, 0.013872656, 0.432307, -0.14197885, 0.24013121), target2);
	target2 = MulAdd(nf2, MF4x4(0.117900506, -0.08039036, -0.17504077, -0.08337764, -0.0068703834, -0.07430392, -0.17125578, -0.3470726, -0.20989974, -0.019394008, -0.027336912, 0.18668686, 0.052886557, -0.023217537, 0.004054446, 0.055974416), target2);
	target2 = MulAdd(ng2, MF4x4(-0.055653654, 0.08726097, 0.01206228, -0.25783783, -0.08736529, 0.19947968, -0.010166337, 0.36168414, 0.20298903, -0.15769973, -0.21389212, -0.19638214, -0.093130395, -0.067289785, 0.10245741, -0.14167903), target2);
	target2 = MulAdd(nh2, MF4x4(0.04559992, -0.102125205, 0.21949212, -0.07308472, -0.15511832, 0.23785073, 0.04275021, 0.085007004, 0.079402514, 0.10851189, -0.151969, -0.29738536, -0.0776658, 0.1113102, -0.18987878, -0.045522977), target2);
	target2 = MulAdd(ni2, MF4x4(0.073690206, -0.016468357, 0.122353435, -0.023995928, 0.095143944, 0.23051415, 0.17702249, 0.030164838, -0.09111423, -0.14219609, -0.19734482, -0.24854833, -0.0067356345, -0.1760497, 0.22637916, 0.119141534), target2);

	MF3 target3 = tex6.SampleLevel(sam, pos, 0).rgb;
	target3 = MulAdd(e1, MF4x3(0.013106969, 0.010379314, 0.012753471, 0.07086715, -0.020893, -0.03968904, -0.06114372, 0.029510446, 0.035070244, 0.11180839, -0.087067656, -0.124039896), target3);
	target3 = MulAdd(e2, MF4x3(-0.056521703, -0.001166792, -2.3704073e-05, 0.011961608, 0.01848977, 0.019861937, 0.012167056, 0.018613879, 0.020505793, 0.009734187, -0.0308419, -0.035206888), target3);
	target3 = MulAdd(ne1, MF4x3(0.0048758825, 0.018046578, 0.014597015, -0.061724614, 0.040989272, 0.05644141, 0.070315465, 0.008318584, 0.0028647361, -0.11316492, 0.043919202, 0.07653594), target3);
	target3 = MulAdd(ne2, MF4x3(0.031487904, -0.010548384, -0.009984509, -0.0022647562, 0.0043304027, 0.0029451603, -0.0063251094, -0.013420807, -0.011919729, -0.022760967, 0.019141173, 0.01782793), target3);

	tex1[gxy] = target1;
	tex2[gxy] = target2;
	tex5[gxy] = MF4(target3, 1);
}

//!PASS 8
//!DESC Conv-4x3x3x16, Conv-3x1x1x112
//!IN INPUT, tex1, tex2, tex5
//!OUT OUTPUT
//!BLOCK_SIZE 8
//!NUM_THREADS 64

void Pass8(uint2 blockStart, uint3 threadId) {
	uint2 gxy = Rmp8x8(threadId.x) + blockStart;
	
	const uint2 outputSize = GetOutputSize();
	if (gxy.x >= outputSize.x || gxy.y >= outputSize.y) {
		return;
	}

	float2 inputPt = GetInputPt();
	float2 pos = (gxy + 0.5f) * inputPt;

	// [ a, d, g ]
	// [ b, e, h ]
	// [ c, f, i ]
	MF4 a1 = tex1.SampleLevel(sam, pos + float2(-inputPt.x, -inputPt.y), 0);
	MF4 b1 = tex1.SampleLevel(sam, pos + float2(-inputPt.x, 0), 0);
	MF4 c1 = tex1.SampleLevel(sam, pos + float2(-inputPt.x, inputPt.y), 0);
	MF4 d1 = tex1.SampleLevel(sam, pos + float2(0, -inputPt.y), 0);
	MF4 e1 = tex1.SampleLevel(sam, pos, 0);
	MF4 f1 = tex1.SampleLevel(sam, pos + float2(0, inputPt.y), 0);
	MF4 g1 = tex1.SampleLevel(sam, pos + float2(inputPt.x, -inputPt.y), 0);
	MF4 h1 = tex1.SampleLevel(sam, pos + float2(inputPt.x, 0), 0);
	MF4 i1 = tex1.SampleLevel(sam, pos + float2(inputPt.x, inputPt.y), 0);

	MF4 na1 = max(-a1, 0);
	MF4 nb1 = max(-b1, 0);
	MF4 nc1 = max(-c1, 0);
	MF4 nd1 = max(-d1, 0);
	MF4 ne1 = max(-e1, 0);
	MF4 nf1 = max(-f1, 0);
	MF4 ng1 = max(-g1, 0);
	MF4 nh1 = max(-h1, 0);
	MF4 ni1 = max(-i1, 0);

	a1 = max(a1, 0);
	b1 = max(b1, 0);
	c1 = max(c1, 0);
	d1 = max(d1, 0);
	e1 = max(e1, 0);
	f1 = max(f1, 0);
	g1 = max(g1, 0);
	h1 = max(h1, 0);
	i1 = max(i1, 0);

	MF4 a2 = tex2.SampleLevel(sam, pos + float2(-inputPt.x, -inputPt.y), 0);
	MF4 b2 = tex2.SampleLevel(sam, pos + float2(-inputPt.x, 0), 0);
	MF4 c2 = tex2.SampleLevel(sam, pos + float2(-inputPt.x, inputPt.y), 0);
	MF4 d2 = tex2.SampleLevel(sam, pos + float2(0, -inputPt.y), 0);
	MF4 e2 = tex2.SampleLevel(sam, pos, 0);
	MF4 f2 = tex2.SampleLevel(sam, pos + float2(0, inputPt.y), 0);
	MF4 g2 = tex2.SampleLevel(sam, pos + float2(inputPt.x, -inputPt.y), 0);
	MF4 h2 = tex2.SampleLevel(sam, pos + float2(inputPt.x, 0), 0);
	MF4 i2 = tex2.SampleLevel(sam, pos + float2(inputPt.x, inputPt.y), 0);

	MF4 na2 = max(-a2, 0);
	MF4 nb2 = max(-b2, 0);
	MF4 nc2 = max(-c2, 0);
	MF4 nd2 = max(-d2, 0);
	MF4 ne2 = max(-e2, 0);
	MF4 nf2 = max(-f2, 0);
	MF4 ng2 = max(-g2, 0);
	MF4 nh2 = max(-h2, 0);
	MF4 ni2 = max(-i2, 0);

	a2 = max(a2, 0);
	b2 = max(b2, 0);
	c2 = max(c2, 0);
	d2 = max(d2, 0);
	e2 = max(e2, 0);
	f2 = max(f2, 0);
	g2 = max(g2, 0);
	h2 = max(h2, 0);
	i2 = max(i2, 0);

	MF4 target1 = { -0.20551574, 0.073114716, -0.21843387, -0.28057778 };
	target1 = MulAdd(a1, MF4x4(0.16737834, 0.35369134, 0.14049083, 0.017871622, 0.0058661173, -0.035960242, -0.039154284, -0.01920433, 0.0729212, -0.03617972, -0.42717552, -0.019914677, -0.30816802, -0.07726792, 0.2088459, -0.09198307), target1);
	target1 = MulAdd(b1, MF4x4(-0.0991125, 0.11411345, 0.15300295, -0.09510225, 0.014268626, -0.42914182, -0.13365223, -0.19440699, -0.27214321, 0.085696176, 0.1527733, -0.21056797, -0.062475704, -0.023041902, -0.29080424, -0.54386055), target1);
	target1 = MulAdd(c1, MF4x4(-0.30736786, -0.16801229, 0.07400606, -0.31128535, -0.11047924, 0.16556956, -0.33445996, -0.09190697, -0.06132585, -0.11021996, 0.014628762, -0.45183894, 0.08186993, 0.19378273, 0.113438204, 0.038364496), target1);
	target1 = MulAdd(d1, MF4x4(0.24129803, 0.29174972, -0.1250327, 0.14254767, 0.0026774528, 0.1742466, -0.021835174, 0.01668921, 0.13646975, 0.313305, -0.23293279, -0.16737306, -0.059818722, 0.06404477, 0.108172625, 0.22065729), target1);
	target1 = MulAdd(e1, MF4x4(-0.3504013, 0.20759478, 0.28683922, 0.2771802, 0.13761812, -0.21180478, -0.17020214, -0.21419087, -0.031916566, -0.040439468, 0.39206958, 0.715565, 0.46198523, 0.05055317, -0.07409331, -0.050633535), target1);
	target1 = MulAdd(f1, MF4x4(0.122958206, 0.0071205017, -0.21314384, -0.22197853, 0.016202174, -0.15960938, -0.14601983, -0.023609173, -0.07586023, 0.099936776, -0.0480375, -0.08681468, -0.14976887, -0.38979456, 0.16078879, -0.12263952), target1);
	target1 = MulAdd(g1, MF4x4(0.1687149, 0.108331114, 0.10112296, 0.01738403, -0.06773097, -0.19410455, -0.09728116, 0.0013846151, -0.038603816, -0.05495021, 0.2453317, -0.40052003, -0.022453755, 0.045039784, 0.0474246, -0.2665161), target1);
	target1 = MulAdd(h1, MF4x4(0.06805519, -0.052276067, 0.052459523, -0.0033053474, 0.13439268, -0.06845637, -0.20462433, -0.09088968, -0.00096404477, -0.35103628, 0.15096465, 0.3285226, 0.018747555, -0.06623108, 0.1754265, 0.3211156), target1);
	target1 = MulAdd(i1, MF4x4(-0.04583627, 0.122267574, -0.44002235, -0.20039988, 0.039372742, -0.16505809, -0.26659602, 0.12207268, 0.03337428, 0.23131758, -0.009866899, 0.010381569, 0.29676, -0.020599596, 0.17816995, 0.32852224), target1);
	target1 = MulAdd(a2, MF4x4(0.09469788, -0.12531966, -0.11786524, -0.3115985, -0.2213199, -0.012536277, -0.13176842, 0.14986996, 0.12069894, 0.2744789, 0.21674646, 0.46060535, -0.4101697, -0.55295914, 0.29993954, 0.114459395), target1);
	target1 = MulAdd(b2, MF4x4(0.18347421, -0.29010707, 0.29127017, 0.087738656, 0.17509815, 0.03982794, 0.1731455, 0.38041735, 0.110374, -0.25045586, 0.36446962, 0.016104888, -0.012112869, 0.10154983, -0.45384112, -0.11416608), target1);
	target1 = MulAdd(c2, MF4x4(-0.033837743, -0.020894403, -0.287127, -0.21196121, -0.03255823, 0.2599821, -0.38386443, 0.30563655, 0.39044768, -0.112917066, -0.021323297, 0.12623324, 0.06885038, -0.20750642, 0.07642818, -0.103580445), target1);
	target1 = MulAdd(d2, MF4x4(0.1723114, -0.3726216, -0.21184283, 0.1761503, -0.24993578, -0.31068864, 0.19998416, -0.23127908, -0.052656204, -0.04243976, 0.4397144, 0.01863219, -0.04796025, -0.11009142, -0.0073631364, 0.2716381), target1);
	target1 = MulAdd(e2, MF4x4(0.04202001, 0.27142277, -0.027491128, 0.27428457, -0.11009916, 0.39839938, -0.7223327, -0.124673314, 0.08123618, -0.11884722, -0.20375855, -0.7179687, 0.30648115, -0.28195357, -0.3350774, -0.29778734), target1);
	target1 = MulAdd(f2, MF4x4(0.071278594, -0.09155223, 0.06417857, 0.08250104, -0.45117077, -0.023316784, 0.38917172, -0.19110887, -0.09265943, -0.2643835, -0.09707039, -0.33238646, -0.0818088, 0.17623149, -0.28457013, 0.13986786), target1);
	target1 = MulAdd(g2, MF4x4(0.019971045, -0.046649583, -0.03036858, 0.07944429, 0.26344573, 0.054998036, 0.07139812, 0.21139374, 0.08021858, -0.025791258, -0.0423707, 0.25174072, -0.021300986, 0.13209766, 0.19120613, 0.3840775), target1);
	target1 = MulAdd(h2, MF4x4(-0.11456406, -0.33503455, 0.21409267, -0.056933913, -0.12204284, -0.37379473, 0.33474764, 0.38634798, 0.12618992, 0.1353635, -0.22651522, -0.3160159, 0.18621005, 0.024818055, -0.11935204, 0.014005666), target1);
	target1 = MulAdd(i2, MF4x4(0.1501391, 0.0014716414, -0.22049955, -0.10928345, -0.07085164, -0.08778668, 0.19251469, -0.4932493, 0.071784936, -0.06903646, -0.060333923, 0.020552203, -0.33637995, -0.22848415, 0.21518159, 0.23815839), target1);
	target1 = MulAdd(na1, MF4x4(-0.04230713, -0.19312756, -0.0613665, 0.058912925, -0.17639293, -0.029920885, -0.027867602, -0.16602923, 0.10262268, -0.0743682, 0.15286638, 0.08042581, -0.042299524, 0.0022034592, 0.15304253, 0.049871147), target1);
	target1 = MulAdd(nb1, MF4x4(0.004346093, -0.07895582, 0.02089975, 0.13429636, -0.1020282, 0.5270822, 0.017983409, 0.1531299, -0.02891241, -0.07050933, -0.18729019, 0.13855362, -0.11538968, 0.20733222, 0.1546878, 0.11550679), target1);
	target1 = MulAdd(nc1, MF4x4(0.21800312, 0.20944421, -0.1817274, 0.022868395, -0.019241469, 0.038916696, 0.088702604, 0.1467791, 0.0048542274, 0.10344671, -0.0107803065, 0.23302868, 0.049728952, -0.016042534, -0.08694045, -0.0028224774), target1);
	target1 = MulAdd(nd1, MF4x4(-0.1570157, 0.08688841, 0.03926086, -0.040503077, -0.052700017, -0.1432353, -0.04516745, -0.09649034, -0.053716175, 0.07059194, -0.07360609, 0.26307717, 0.121471435, -0.13640986, -0.1113535, -0.38560814), target1);
	target1 = MulAdd(ne1, MF4x4(-0.014722592, -0.39773384, 0.28259715, -0.10905738, 0.07889424, 0.1415529, -0.15419348, -0.2064834, -0.15126482, -0.28288555, -0.0014232624, -0.26178944, -0.025823193, 0.008017357, -0.08547297, 0.26373458), target1);
	target1 = MulAdd(nf1, MF4x4(0.2978961, -0.020236012, -0.101216674, 0.15498216, -0.0069343713, -0.088363856, 0.20511419, 0.23958007, 0.045810107, -0.19189738, -0.14137349, 0.04177724, -0.1394684, 0.0071990825, 0.06991723, -0.21052721), target1);
	target1 = MulAdd(ng1, MF4x4(-0.05615232, 0.22506002, -0.12479586, -0.0070057763, 0.092545755, 0.096306436, 0.041890718, 0.1226944, -0.07541768, -0.08369033, -0.15144373, 0.09310172, 0.28388003, 0.09935607, 0.11299509, 0.0014283776), target1);
	target1 = MulAdd(nh1, MF4x4(-0.005848455, 0.117699094, 0.23539856, 0.11006195, 0.10962903, 0.28139547, 0.18785141, -0.11635996, 0.057289902, 0.2370178, -0.29825503, -0.13706475, -0.3869794, 0.024066223, 0.36742347, 0.35919484), target1);
	target1 = MulAdd(ni1, MF4x4(0.13744523, 0.09239356, 0.01173183, 0.119055405, -0.07841836, 0.0668925, 0.22598477, -0.016510552, 0.07971727, -0.17154713, 0.03333588, -0.13790733, 0.15421963, 0.2895701, -0.28440917, 0.015132756), target1);
	target1 = MulAdd(na2, MF4x4(-0.054354303, 0.36663428, 0.02634933, 0.18688667, 0.0607547, 0.17321853, 0.086784445, -0.023283, 0.0027200899, 0.026914112, -0.07438439, 0.27042162, 0.09985293, 0.012430832, -0.20694605, -0.20363812), target1);
	target1 = MulAdd(nb2, MF4x4(-0.42759168, 0.15540305, -0.18979609, 0.0073875943, 0.034251947, -0.34551802, 0.53327596, 0.17446762, -0.25879666, 0.2780996, 0.11094055, 0.17597, 0.13790102, 0.2615357, 0.09666047, 0.36155468), target1);
	target1 = MulAdd(nc2, MF4x4(0.052614138, -0.1880028, 0.361331, 0.07957976, 0.12552904, -0.0042941784, 0.096562445, -0.041199915, 0.07412456, 0.16379668, 0.05464284, 0.050022952, -0.028281605, 0.09332573, 0.21379845, 0.21396561), target1);
	target1 = MulAdd(nd2, MF4x4(-0.07546953, 0.16393837, -0.3060623, -0.64610606, -0.013715101, 0.18005042, 0.045286633, -0.21057944, -0.12779316, -0.10310629, 0.14360385, 0.011625261, 0.05597252, 0.023864657, -0.00018915108, -0.24224915), target1);
	target1 = MulAdd(ne2, MF4x4(-0.08550672, 0.2438917, -0.30383766, -0.2463794, 0.13835424, -0.079946786, -0.060197506, 0.051599402, -0.24983203, -0.06691107, -0.0041784844, 0.07539119, -0.030340329, -0.23565106, -0.17968354, -0.10262371), target1);
	target1 = MulAdd(nf2, MF4x4(0.19315718, -0.045718513, 0.120446794, -0.225136, 0.22922774, -0.046026126, 0.11448238, 0.114267804, -0.22327735, -0.03368635, 0.29763463, 0.03673529, -0.0583939, -0.092253424, 0.045279544, 0.04475646), target1);
	target1 = MulAdd(ng2, MF4x4(-0.062286656, -0.06241419, -0.23600577, -0.24818502, -0.058666106, 0.17710151, -0.1751668, 0.05758226, 0.18278669, 0.033297777, 0.046349872, 0.09178792, -0.0745512, 0.20019765, 0.037281513, 0.22204825), target1);
	target1 = MulAdd(nh2, MF4x4(-0.24708512, -0.1318695, -0.24966322, -0.31206796, 0.079176836, 0.11837155, -0.12882641, -0.01013533, -0.009065797, 0.0789075, 0.016151598, 0.00020127615, 0.1450729, 0.10825556, 0.09322918, 0.07283566), target1);
	target1 = MulAdd(ni2, MF4x4(0.2604332, 0.25550258, 0.07709474, 0.28426003, 0.10387355, 0.09152259, 0.18742633, -0.0073229484, -0.20327723, -0.26013616, 0.055792782, -0.1713302, 0.14862068, 0.06698207, 0.17608787, -0.11622757), target1);
	
	MF4 target2 = { 0.31939298, 0.03303962, -0.010749771, 0.084496394 };
	target2 = MulAdd(a1, MF4x4(-0.18413043, -0.12355504, 0.2708789, 0.17259507, -0.069752574, 0.12640886, 0.01075919, -0.028221423, -0.020598855, -0.17259665, 0.16907778, -0.10040477, 0.017177016, 0.0176426, 0.23724149, 0.14657862), target2);
	target2 = MulAdd(b1, MF4x4(0.16921899, -0.33950835, 0.37508205, 0.09996622, 0.13377811, -0.036743056, -0.11633877, -0.23046862, -0.009307903, 0.027441062, 0.054166224, 0.011627087, -0.22831611, 0.043198805, -0.12695734, 0.0062862337), target2);
	target2 = MulAdd(c1, MF4x4(0.17216596, -0.15588646, -0.14179194, 0.12487524, 0.10507964, 0.124544986, -0.0046104924, -0.116668865, -0.006100901, -0.022074439, 0.03376759, 0.10498887, 0.109659016, -0.03567928, 0.29972833, -0.045950003), target2);
	target2 = MulAdd(d1, MF4x4(-0.29127, 0.21912472, 0.16494286, 0.027708547, 0.043136686, 0.04409876, -0.07686145, -0.13180132, -0.16630307, 0.15650205, -0.005864527, 0.03916553, 0.15750135, 0.1705246, 0.21626697, 0.06906506), target2);
	target2 = MulAdd(e1, MF4x4(0.055395894, 0.28228188, 0.114794776, 0.020619212, -0.031812593, 0.11964309, -0.24317431, -0.36277202, 0.54564184, -0.032843567, -0.118973784, -0.40999004, -0.118530475, 0.09256661, 0.06583871, -0.36627474), target2);
	target2 = MulAdd(f1, MF4x4(0.17914769, 0.33976436, -0.11220768, 0.1325754, 0.40586957, 0.3064959, -0.19086123, 0.014164092, -0.17376979, -0.0037554938, 0.11771888, 0.44933778, -0.15937245, -0.10635065, 0.084963776, 0.14630255), target2);
	target2 = MulAdd(g1, MF4x4(-0.3723194, 0.21509883, 0.020062352, 0.094394304, 0.030794155, -0.11394617, -0.09103134, -0.0042343247, -0.28981096, -0.061873477, -0.17772584, 0.36440176, 0.007828069, -0.012121627, 0.25862312, 0.24646287), target2);
	target2 = MulAdd(h1, MF4x4(0.10368119, -0.06185447, -0.022830853, 0.10918094, 0.18888599, -0.09235343, -0.055134308, -0.2210923, 0.15334128, -0.3084707, 0.31606838, 0.39931116, 0.29489174, -0.24794856, -0.4799932, -0.2617589), target2);
	target2 = MulAdd(i1, MF4x4(0.32550937, -0.17103608, 0.3257806, -0.23358762, 0.20370598, 0.13325407, -0.020303056, -0.105462655, -0.22264756, -0.034177396, 0.36885822, 0.20504399, 0.36375418, -0.26149705, 0.022433946, 0.15646128), target2);
	target2 = MulAdd(a2, MF4x4(0.007481421, 0.005642636, -0.170087, -0.08915849, 0.6329519, 0.06880098, -0.20856442, -0.1801066, -0.1342754, 0.13643123, 0.26994216, -0.27503812, 0.018052012, 0.058687408, -0.19784917, 0.021157453), target2);
	target2 = MulAdd(b2, MF4x4(-0.1486918, 0.12212738, -0.03104796, 0.08664756, 0.3464865, 0.27309546, -0.022896903, -0.32080007, -0.28113958, 0.74847424, -0.33735126, -0.04616876, -0.23119605, 0.4214322, -0.16457441, 0.09162191), target2);
	target2 = MulAdd(c2, MF4x4(0.15863913, 0.1303683, -0.06339421, 0.06328312, -0.3100047, -0.33906308, 0.13805804, -0.14923394, 0.4997829, -0.14977637, 0.02265068, -0.04585939, 0.29802153, 0.3767994, -0.031849556, -0.051892217), target2);
	target2 = MulAdd(d2, MF4x4(-0.04541847, -0.13645087, 0.14119779, 0.06409465, -0.29877988, -0.0009743694, 0.028256422, 0.14978185, -0.13014801, -0.24171488, -0.10782599, 0.010709664, 0.21880737, -0.34132662, 0.22972895, -0.07159475), target2);
	target2 = MulAdd(e2, MF4x4(-0.1510528, 0.115773134, 0.036761034, -0.284284, -0.35684052, 0.16348189, -0.105475456, 0.08259931, -0.6489164, -0.033928663, -0.04243186, 0.25324553, -0.31829014, 0.066608824, -0.11131264, 0.51919967), target2);
	target2 = MulAdd(f2, MF4x4(-0.06517726, 0.1933327, 0.044391852, -0.013346896, -0.3033368, 0.106350735, -0.1351003, -0.13414839, 0.11720078, -0.24844061, -0.2900742, -0.047861837, 0.42789885, -0.47915378, -0.09643217, -0.22915216), target2);
	target2 = MulAdd(g2, MF4x4(0.109821886, 0.31451595, 0.13300805, -0.08792569, -0.023928089, -0.038061168, 0.17821129, 0.003772247, 0.14684688, -0.12646271, 0.16072205, 0.011095222, 0.09209181, 0.005167038, -0.08823252, 0.079890974), target2);
	target2 = MulAdd(h2, MF4x4(-0.20074554, 0.39979288, -0.007316405, -0.047838025, 0.10849111, -0.22469573, -0.059183244, -0.13663793, 0.07881898, 0.105663374, -0.3152222, 0.08104766, -0.22965154, 0.118780024, -0.07886757, 0.073527716), target2);
	target2 = MulAdd(i2, MF4x4(0.1304303, 0.023158893, -0.081089824, -0.15955788, 0.42183343, -0.12898655, -0.14028409, 0.011985, 0.3977131, -0.313598, -0.148818, -0.048350018, -0.13534498, -0.12760727, -0.014968193, 0.06646305), target2);
	target2 = MulAdd(na1, MF4x4(0.18085147, -0.11859402, 0.117530234, -0.10420847, 0.1848264, -0.12192718, -0.18729533, -0.10098887, 0.011134682, -0.23658146, 0.12963286, 0.117404245, 0.054487415, -0.030003065, -0.32175776, -0.08044254), target2);
	target2 = MulAdd(nb1, MF4x4(-0.07251758, 0.073430285, -0.22191651, 0.030512359, -0.029650904, -0.15816379, 0.0418705, 0.04776615, -0.014070836, -0.14669086, -0.009874937, -0.015444495, -0.2747725, -0.061624944, -0.11261252, 0.14757589), target2);
	target2 = MulAdd(nc1, MF4x4(-0.09274913, 0.046194065, 0.05642919, -0.07803342, 0.23578037, 0.01224276, 0.015608659, 0.05847865, -0.091819406, -0.14424564, -0.034869857, 0.019276984, -0.031180726, -0.21905676, 0.100375675, -0.13659117), target2);
	target2 = MulAdd(nd1, MF4x4(-0.072157644, -0.13294607, 0.24301524, 0.048643183, -0.04338094, -0.0021709928, -0.06530963, -0.22672611, 0.07479903, 0.08388352, -0.07460508, -0.14517406, -0.072923675, -0.26912874, -0.2769797, 0.054033212), target2);
	target2 = MulAdd(ne1, MF4x4(-0.5648679, -0.28059873, -0.039906785, -0.39112374, -0.3841447, -0.20383365, 0.12607281, 0.16049421, -0.34394273, -0.022326993, 0.16646549, -0.23433913, 0.071224056, 0.048073303, 0.122035526, 0.14941359), target2);
	target2 = MulAdd(nf1, MF4x4(-0.11803124, 0.114169255, 0.018188128, 0.0053847185, -0.07537228, -0.048262373, 0.073838905, -0.041833423, 0.044405136, -0.03813592, 0.076818384, -0.06015139, -0.085042655, -0.14306667, -0.21477652, 0.31548396), target2);
	target2 = MulAdd(ng1, MF4x4(0.19307283, -0.014985916, -0.14332882, -0.05549754, 0.14551677, 0.11406769, 0.2744144, -0.031179624, 0.17578745, -0.11309805, 0.010072839, -0.07453384, -0.23163621, 0.19061968, 0.11016298, 0.108093746), target2);
	target2 = MulAdd(nh1, MF4x4(0.23180474, -0.12522835, -0.03218773, -0.0031955864, -0.14057393, 0.07269213, -0.20883523, 0.09332164, -0.16037942, 0.25845763, -0.002303125, -0.014625506, 0.17063208, -0.11648214, 0.13988028, -0.024688654), target2);
	target2 = MulAdd(ni1, MF4x4(0.043369994, 0.12473897, 0.108142346, 0.10268199, 0.16159926, -0.17804666, -0.007889351, 0.07232418, 0.26326916, 0.0474316, -0.41637155, -0.11879895, 0.14051722, 0.08747377, 0.1162202, -0.06443569), target2);
	target2 = MulAdd(na2, MF4x4(0.0041097966, 0.109841965, 0.097240336, 0.08123332, -0.081065506, 0.12650634, 0.23450434, 0.09631333, 0.21942414, -0.108897425, -0.033703003, 0.047280088, -0.017764917, -0.058596086, -0.15305139, 0.09055131), target2);
	target2 = MulAdd(nb2, MF4x4(0.26824722, 0.014116421, 0.11844865, -0.156046, 0.057152968, 0.21287468, -0.3243975, -0.18181354, -0.07131152, -0.17860547, 0.18918999, 0.15399154, 0.20270234, 0.11524436, 0.05146645, -0.18196748), target2);
	target2 = MulAdd(nc2, MF4x4(-0.2745638, -0.026905773, 0.045458756, 0.22942849, -0.21052304, 0.20649272, -0.03713028, 0.33655703, -0.12467089, -0.015030098, 0.15504798, -0.05647672, 0.18751477, 0.08505986, 0.04756538, -0.058810517), target2);
	target2 = MulAdd(nd2, MF4x4(0.1737789, 0.06552432, -0.34797582, -0.05370679, -0.036056817, 0.085242435, -0.12802805, 0.03710984, -0.09883285, 0.08946925, -0.0446528, 0.07734006, -0.10973603, 0.262812, 0.14010249, -0.1543792), target2);
	target2 = MulAdd(ne2, MF4x4(0.316673, -0.16414417, -0.23147403, -0.3080756, -0.056620106, -0.11389848, 0.0948114, 0.13236332, -0.40048537, -0.090742044, 0.12090404, 0.024549136, -0.19124876, -0.3007761, 0.16159211, -0.28620452), target2);
	target2 = MulAdd(nf2, MF4x4(0.032962102, -0.05481415, -0.1185786, 0.18153866, -0.2105442, -0.03802839, 0.14060515, 0.072460145, -0.1523761, -0.11426362, 0.02610123, -0.053477813, -0.20768824, 0.04533907, 0.14381588, -0.041578818), target2);
	target2 = MulAdd(ng2, MF4x4(-0.021694858, -0.028784249, -0.09928565, 0.07335764, 0.1315628, 0.11288982, 0.078681685, -0.1229723, -0.09618894, -0.07387309, 0.04340066, -0.036534667, 0.37295115, -0.08176548, -0.16579813, -0.13485877), target2);
	target2 = MulAdd(nh2, MF4x4(0.45979, -0.289226, -0.15456465, 0.0117592, 0.22803205, 0.15497394, -0.38995707, 0.005227681, -0.20515667, 0.17184737, -0.069968715, -0.24724679, -0.048521046, 0.013277072, 0.049562644, -0.05522196), target2);
	target2 = MulAdd(ni2, MF4x4(0.14561136, -0.18995416, 0.18104567, 0.063063085, -0.09728072, 0.018328888, -0.17258182, 0.069259025, 0.15187183, 0.16760696, -0.14086077, 0.013297849, -0.07579904, -0.09294852, -0.24227127, -0.048749007), target2);

	MF3 result = tex5.SampleLevel(sam, pos, 0).rgb;
	result = MulAdd(e1, MF4x3(0.023055293, 0.028219413, 0.024810018, 0.031653803, 0.050207954, 0.04504577, 0.03877294, 0.0280465, 0.025589157, 0.0019387804, 0.023891818, 0.016049948), result);
	result = MulAdd(e2, MF4x3(0.006562233, 0.03880659, 0.037682824, -0.021441424, -0.011277022, -0.012471097, -0.030526241, -0.013880651, -0.014213582, 0.0075785257, -0.0017350517, -0.0024610942), result);
	result = MulAdd(ne1, MF4x3(0.015097556, 0.020325955, 0.015611413, -0.014755199, -0.034323387, -0.032325987, -0.008603291, 0.010346807, 0.011044969, -0.004739154, -0.026397636, -0.01995132), result);
	result = MulAdd(ne2, MF4x3(0.0097906375, -0.015094543, -0.016887931, -0.0007786067, -0.0069163437, -0.008449091, 0.025534432, 0.018064791, 0.017047096, 0.00055667467, 0.001493328, 0.003636564), result);
	result = MulAdd(max(target1, 0), MF4x3(-0.042251963, -0.042396102, -0.040224236, -0.004492444, -0.0069470624, -0.0065821502, 0.062203273, 0.06213223, 0.053592753, 0.06424337, 0.07964681, 0.07316769), result);
	result = MulAdd(max(target2, 0), MF4x3(0.026366957, 0.02789826, 0.027239393, -0.006712127, -0.0035723334, -0.0032348586, -0.04960562, -0.062758155, -0.058574595, -0.02896146, -0.020999067, -0.021301663), result);
	result = MulAdd(max(-target1, 0), MF4x3(-0.013106142, -0.017057793, -0.014653614, -0.04254173, -0.043040022, -0.041918345, -0.011146975, -0.0043820064, -0.003768677, -0.0027743059, -0.0114479, -0.0082087545), result);
	result = MulAdd(max(-target2, 0), MF4x3(-0.10087762, -0.10447133, -0.1005168, -0.04165659, -0.04558967, -0.040086865, 0.0016493691, 0.0055392827, 0.0070476984, -0.018665023, -0.035552308, -0.03375731), result);
	result += MF3(0.018580848, -0.022256816, -0.0266178);
	result += INPUT.SampleLevel(sam, pos, 0).rgb;

	OUTPUT[gxy] = MF4(result, 1);
}
